Load necessary libraries

library(dplyr) library(ggplot2) library(tidyr) library(completejourney)

Demographics dataset

set.seed(123) demographics <- data.frame( customer_id = 1:1000, gender = sample(c(“Male”, “Female”), 1000, replace = TRUE), income_group = sample(c(“Low”, “Medium”, “High”), 1000, replace = TRUE), age = sample(12:80, 1000, replace = TRUE), screen_time = rnorm(1000, mean = 4, sd = 1) # Simulated screen time in hours )

Transactions dataset

transactions <- data.frame( transaction_id = 1:5000, customer_id = sample(1:1000, 5000, replace = TRUE), product_category = sample(c(“Coffee”, “Electronics”, “Groceries”, “Clothing”), 5000, replace = TRUE), sales_value = runif(5000, 5, 500), # Simulated sales value in dollars quantity = sample(1:5, 5000, replace = TRUE) )

Data 1: Income by Gender

Join transactions and demographics, then aggregate total sales value by income group and gender

income_gender_data <- transactions %>% inner_join(demographics, by = “customer_id”) %>% filter(!is.na(income_group), !is.na(gender)) %>% group_by(income_group, gender) %>% summarise(total_sales_value = sum(sales_value, na.rm = TRUE)) %>% arrange(desc(total_sales_value))

Plot: Income by Gender

ggplot(income_gender_data, aes(x = income_group, y = total_sales_value, fill = gender)) + geom_bar(stat = “identity”, position = “dodge”) + labs( title = “Total Sales Value by Income Group and Gender”, subtitle = “Comparing sales value across different income groups and genders”, x = “Income Group”, y = “Total Sales Value”, fill = “Gender” ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Data 2: Screen Time by Age

Categorize customers into Teenagers (12-19) and Adults (20+)

age_groups <- demographics %>% mutate(age_group = ifelse(age >= 12 & age <= 19, “Teenager”, “Adult”))

Filter and calculate average screen time by age group

screen_time_data <- age_groups %>% filter(!is.na(screen_time)) %>% group_by(age_group) %>% summarise(avg_screen_time = mean(screen_time, na.rm = TRUE))

Plot: Screen Time by Age Group

ggplot(screen_time_data, aes(x = age_group, y = avg_screen_time, fill = age_group)) + geom_bar(stat = “identity”, show.legend = FALSE) + labs( title = “Average Screen Time by Age Group”, subtitle = “Comparing average screen time between teenagers and adults”, x = “Age Group”, y = “Average Screen Time (in hours)” ) + theme_minimal()

Data 3: Coffees Purchased by Gender

Filter Data

coffee_data <- transactions %>% inner_join(demographics, by = “customer_id”) %>% filter(product_category == “Coffee”) %>% group_by(gender) %>% summarise(total_coffees_purchased = n())

Plot

ggplot(coffee_data, aes(x = gender, y = total_coffees_purchased, fill = gender)) + geom_bar(stat = “identity”, show.legend = FALSE) + labs( title = “Coffees Purchased by Gender”, subtitle = “Total number of coffee purchases by gender”, x = “Gender”, y = “Total Coffees Purchased” ) + theme_minimal()