filtered_trans <- demographics %>%
left_join(transactions) %>%
mutate(transaction_quarter = quarter(transaction_timestamp, type = "year.quarter"))
## Joining, by = "household_id"
full_view <- filtered_trans %>%
inner_join(products, key = "product_id") %>%
inner_join(coupon_redemptions) %>%
inner_join(coupons) %>%
inner_join(campaign_descriptions) %>%
inner_join(campaigns)
## Joining, by = "product_id"
## Joining, by = "household_id"
## Joining, by = c("product_id", "coupon_upc", "campaign_id")
## Joining, by = "campaign_id"
## Joining, by = c("household_id", "campaign_id")
savings_view <- full_view %>%
dplyr::mutate(coupon_savings = (coupon_disc + coupon_match_disc)) %>%
dplyr::mutate(customer_amount = (sales_value - (coupon_savings))) %>%
# dplyr::mutate(campaign_id = as.integer(campaign_id)) %>%
group_by(transaction_quarter, week, household_id, household_size, campaign_id, campaign_type, age, income, marital_status, quantity, sales_value) %>%
summarise(total_saving = sum(coupon_savings))
## `summarise()` has grouped output by 'transaction_quarter', 'week',
## 'household_id', 'household_size', 'campaign_id', 'campaign_type', 'age',
## 'income', 'marital_status', 'quantity'. You can override using the `.groups`
## argument.
test1 <- full_view %>%
count(campaign_type, household_size) %>%
group_by(household_size) %>%
mutate(pct = n/ sum(n))
test2 <- savings_view %>%
count(campaign_type, household_size) %>%
group_by(household_size) %>%
mutate(pct = n/ sum(n))
test3 <- full_view %>% #table by age
count(campaign_type, age) %>%
group_by(age) %>%
mutate(pct = n/ sum(n))
test1 %>%
ggplot(aes(household_size, fill = campaign_type)) +
geom_col(aes(household_size, pct)) +
labs(title = "Campaigns per household (percentage)", subtitle = "subtitle") +
theme_wsj() +
theme(text = element_text(size = 7), axis.text = element_text(size = 7),
plot.title = element_text(size = 14), plot.subtitle = element_text(size = 9)) +
scale_y_continuous()

test2 %>%
ggplot(aes(household_size, fill = campaign_type)) +
geom_col(aes(household_size, pct)) +
labs(title = "Campaigns per household (percentage)", subtitle = "subtitle") +
theme_wsj() +
theme(text = element_text(size = 7), axis.text = element_text(size = 7),
plot.title = element_text(size = 14), plot.subtitle = element_text(size = 9)) +
scale_y_continuous()

test1
## # A tibble: 15 × 4
## # Groups: household_size [5]
## campaign_type household_size n pct
## <ord> <ord> <int> <dbl>
## 1 Type A 1 17214 0.980
## 2 Type A 2 24300 0.976
## 3 Type A 3 9392 0.965
## 4 Type A 4 6105 0.939
## 5 Type A 5+ 5893 0.973
## 6 Type B 1 286 0.0163
## 7 Type B 2 489 0.0196
## 8 Type B 3 277 0.0285
## 9 Type B 4 357 0.0549
## 10 Type B 5+ 145 0.0240
## 11 Type C 1 61 0.00347
## 12 Type C 2 102 0.00410
## 13 Type C 3 60 0.00617
## 14 Type C 4 37 0.00569
## 15 Type C 5+ 16 0.00264
test3
## # A tibble: 18 × 4
## # Groups: age [6]
## campaign_type age n pct
## <ord> <ord> <int> <dbl>
## 1 Type A 19-24 1198 0.958
## 2 Type A 25-34 10598 0.954
## 3 Type A 35-44 16282 0.967
## 4 Type A 45-54 27033 0.977
## 5 Type A 55-64 4012 0.989
## 6 Type A 65+ 3781 0.991
## 7 Type B 19-24 41 0.0328
## 8 Type B 25-34 456 0.0410
## 9 Type B 35-44 509 0.0302
## 10 Type B 45-54 484 0.0175
## 11 Type B 55-64 39 0.00962
## 12 Type B 65+ 25 0.00655
## 13 Type C 19-24 11 0.0088
## 14 Type C 25-34 58 0.00522
## 15 Type C 35-44 47 0.00279
## 16 Type C 45-54 145 0.00524
## 17 Type C 55-64 5 0.00123
## 18 Type C 65+ 10 0.00262
test3 %>%
ggplot(aes(age, fill = campaign_type)) +
geom_col(aes(age, pct)) +
labs(title = "Campaigns per age group (percentage)", subtitle = "subtitle") +
theme_wsj() +
theme(text = element_text(size = 7), axis.text = element_text(size = 7),
plot.title = element_text(size = 14), plot.subtitle = element_text(size = 9)) +
scale_y_continuous()

full_view %>%
count(campaign_type, household_size) %>%
mutate(pct = n / sum(n)) %>% # convert to proportions
ggplot(aes(reorder(campaign_type, pct), pct)) +
geom_col() +
coord_flip() + # now x becomes y
labs(x = "campaign type", y = "Relative frequency") +
scale_y_continuous(labels = scales::percent) +
ggtitle("Percent of whole")

filtered_trans %>% #plot of all transactions
select(sales_value, quantity, household_size) %>%
filter(quantity < 40000, sales_value < 400) %>%
ggplot(aes(x = quantity, y = sales_value, color = household_size)) +
geom_point() +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by household size", subtitle = "OMG")

filtered_trans %>% #plot of all transactions
select(sales_value, quantity, age) %>%
filter(quantity < 40000, sales_value < 400) %>%
ggplot(aes(x = quantity, y = sales_value, color = age)) +
geom_point() +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by age", subtitle = "OMG")

savings_view %>%
ggplot(aes(x = quantity, y = sales_value, color = household_size)) +
geom_point() +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by household size", subtitle = "OMG")

savings_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
group_by(household_size, total_saving, transaction_quarter) %>%
ggplot(aes(y = total_saving, x = transaction_quarter, fill = household_size)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "savings by household size", subtitle = "OMG")

savings_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_saving, x = transaction_quarter, fill = age)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "savings by age", subtitle = "OMG")

savings_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_saving, x = transaction_quarter, fill = marital_status)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "savings by marital status", subtitle = "OMG")

savings_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_saving, x = transaction_quarter, fill = income)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "savings by income", subtitle = "OMG")

savings_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_saving, x = week, color = campaign_type)) +
geom_point(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "savings by campaign", subtitle = "OMG")
## Warning: Width not defined. Set with `position_dodge(width = ?)`

#spend
spend_view <- full_view %>%
dplyr::mutate(coupon_savings = (coupon_disc + coupon_match_disc)) %>%
dplyr::mutate(customer_amount = (sales_value - (coupon_savings))) %>%
# dplyr::mutate(campaign_id = as.integer(campaign_id)) %>%
group_by(transaction_quarter, week, household_id, household_size, campaign_id, campaign_type, age, income, marital_status) %>%
summarise(total_spend = sum(customer_amount))
## `summarise()` has grouped output by 'transaction_quarter', 'week',
## 'household_id', 'household_size', 'campaign_id', 'campaign_type', 'age',
## 'income'. You can override using the `.groups` argument.
spend_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = transaction_quarter, fill = household_size)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by household size", subtitle = "OMG")

spend_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = transaction_quarter, fill = age)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by age", subtitle = "OMG")

spend_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = transaction_quarter, fill = marital_status)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by marital status", subtitle = "OMG")

spend_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = transaction_quarter, fill = income)) +
geom_bar(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by income", subtitle = "OMG")

spend_view %>%
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = week, color = campaign_type)) +
geom_point(position = 'dodge', stat = 'identity') +
scale_y_continuous(labels = scales::dollar) +
labs(title = "spend by campaign", subtitle = "OMG")
## Warning: Width not defined. Set with `position_dodge(width = ?)`

spend_view %>% #line view
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = week)) +
geom_line(data = spend_view, aes(week, total_spend, color = income)) +
scale_y_continuous(labels = scales::dollar) +
theme(legend.justification = 'centre',
legend.position = 'bottom',
legend.direction = "horizontal",
legend.key.height = unit(0.5, "cm"),
legend.key.width = unit(0.5,"cm")) +
labs(title = "spend by income", subtitle = "OMG")

spend_view %>% #line view
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = total_spend, x = week)) +
geom_line(data = spend_view, aes(week, total_spend, color = household_size)) +
scale_y_continuous(labels = scales::dollar) +
theme(legend.justification = 'centre',
legend.position = 'bottom',
legend.direction = "horizontal",
legend.key.height = unit(0.5, "cm"),
legend.key.width = unit(0.5,"cm")) +
labs(title = "spend by household size", subtitle = "OMG")

spend_view %>% #point
filter(household_id != 1228) %>%
arrange(campaign_id) %>%
ggplot(aes(y = household_size, x = age)) +
geom_point(data = spend_view, aes(age, household_size)) +
labs(title = "age vs household size", subtitle = "OMG")
