Plot 1 - Comparison of total sales between married and unmarried households
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
transactions <- transactions_sample
demographics %>%
left_join(transactions, by = 'household_id') %>%
filter(!is.na(marital_status)) %>%
group_by(marital_status) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE), mean_sales = mean(sales_value, na.rm = TRUE)) %>%
ggplot(aes(x = marital_status, y = total_sales, fill = marital_status)) +
geom_bar(stat = 'identity', alpha = 0.5) +
scale_fill_manual(values = c('Married' = 'blue', 'Unmarried' = 'red')) +
labs(
title = 'Total Sales by Marital Status',
x = 'Marital Status',
y = 'Total Sales Value'
) +
theme_minimal()
Plot 2 - Total Sales by Transaction Hour
transactions %>%
mutate(Transaction_Hour = hour(transaction_timestamp)) %>%
filter(!is.na(sales_value)) %>%
group_by(Transaction_Hour) %>%
summarise(Total_sales = sum(sales_value)) %>%
ggplot(aes(x = Transaction_Hour, y = Total_sales)) +
geom_line(color = "#0073C2", size = 1.5) +
geom_point(color = "#EFC000", size = 2, shape = 21, fill = "white", stroke = 2) +
geom_smooth(method="loess", se=FALSE, color="#696969", linetype="dotted", size=1) +
labs(
title = 'Total Sales by Transaction Hour',
x = 'Transaction Hour',
y = 'Total Sales Value'
) +
theme_minimal(base_size = 16) +
theme(
panel.grid.major = element_line(size = 0.5, linetype = "solid", color = "#EAEAEA"),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.3, face = "bold", color="#333333"),
axis.title.x = element_text(face="bold", color="#333333"),
axis.title.y = element_text(face="bold", color="#333333")
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
Plot 3 - Which display location has the highest sale value?
promotions <- promotions_sample %>%
mutate(PK = paste(product_id, store_id, sep = "_")) %>%
filter(!is.na(display_location))
transactions <- transactions_sample %>%
mutate(PK = paste(product_id, store_id, sep = "_"))
merged <-inner_join(transactions, promotions, by = "PK", relationship = "many-to-many")
result <- merged %>%
group_by(display_location) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total_sales))
result <- result %>%
mutate(display_index = as.numeric(factor(display_location, levels = display_location)))
ggplot(result, aes(x = reorder(display_location, -total_sales), y = total_sales, group=1)) +
geom_bar(stat = 'identity', fill = 'skyblue', width = 0.7, alpha = 0.8) +
geom_smooth(method = 'loess', se = FALSE, color = '#002f6c', size = 2, linetype = "solid") +
geom_point(color = "#fdae61", size = 3) +
labs(
title = 'Total Sales by Display Location with Trend Line',
x = 'Display Location',
y = 'Total Sales Value'
) +
theme_minimal(base_size = 16) +
theme(
axis.text.x = element_text(angle = 35, hjust = 1, vjust = 1, size = 13),
axis.title.x = element_text(face='bold', size=15),
axis.title.y = element_text(face='bold', size=15),
plot.title = element_text(face = "bold", hjust = 0.5, size = 19),
panel.grid.major = element_line(size = 0.5, linetype = "solid", color = "#ededed")
)
## `geom_smooth()` using formula = 'y ~ x'