R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- get_transactions()
promotions <- get_promotions()
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
original_transactions <- transactions
# 1 -------------------------------------------------------------------------------

library(dplyr)
library(ggplot2)

distinct_transactions <- transactions %>%
  group_by(household_id) %>%
  summarise(distinct_transactions = n_distinct(transaction_timestamp))


merged_data <- distinct_transactions %>%
  left_join(demographics, by = "household_id")

# filter(!is.na(household_comp))
result <- merged_data %>%
  group_by(income) %>%
  summarise(avg_household_size = mean(household_size),
            total_distinct_transactions = sum(distinct_transactions))
## Warning: There were 13 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_household_size = mean(household_size)`.
## ℹ In group 1: `income = Under 15K`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 12 remaining warnings.
result <- result %>%
  mutate(transactions_per_size = total_distinct_transactions / avg_household_size)

ggplot(result, aes(x = income, y = total_distinct_transactions)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Total Distinct Transactions per Household Size, by Income Range",
       x = "Income Range",
       y = "Number of Distinct Transactions")

# 2 ------------------------------------------------------------------------------

merged_data <- transactions %>%
  left_join(demographics, by = "household_id")

avg_sales_per_transaction_per_household <- merged_data %>%
  group_by(income, household_id) %>%
  summarise(avg_sales_per_transaction = mean(sales_value))
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
avg_sales_per_transaction_by_income <- avg_sales_per_transaction_per_household %>%
  group_by(income) %>%
  summarise(avg_sales_per_transaction = mean(avg_sales_per_transaction))

ggplot(avg_sales_per_transaction_by_income, aes(x = income, y = avg_sales_per_transaction)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Average Sales per Transaction per Household by Income Range",
       x = "Income Range",
       y = "Average Sales Value per Transaction (10's of dollars)")

# 3 -------------------------------------------------------------------------------
promotion_transactions <- transactions %>%
  filter(coupon_disc > 0)
product_sales <- promotion_transactions %>%
  group_by(product_id) %>%
  summarise(total_sales_value = sum(sales_value))
product_sales <- product_sales %>%
  arrange(desc(total_sales_value))
top_5_products <- product_sales %>%
  top_n(5, total_sales_value)
ggplot(top_5_products, aes(x = reorder(as.character(product_id), -total_sales_value), y = total_sales_value)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Top 5 Products by Sales Value with Promotion in the Same Week",
       x = "Product ID",
       y = "Total Sales Value") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels for better readability