library(tidyverse)
library(readxl)
library(scales)
library(lubridate)
load("C:/Users/riaku/Downloads/webshop.RData")
library(readxl)
income_statement <- read_excel("C:/Users/riaku/Downloads/Income_statement.xlsx")
library(readxl)
promotions <- read_excel("C:/Users/riaku/Downloads/promotions.xlsx")
library(readxl)
prices <- read_excel("C:/Users/riaku/Downloads/Prices.xlsx")
# joining visits with geographical data to understand the customer locations;
customer_geography <- visits %>%
left_join(geo, by = "VisitID") %>%
left_join(purchases, by = "VisitID") %>%
filter(!is.na(transactionID)) # Only customers who purchased
# countries analysis
country_summary <- customer_geography %>%
group_by(Country) %>%
summarise(
customers = n_distinct(VisitID),
transactions = n_distinct(transactionID),
total_revenue = sum(Payment),
avg_order_value = mean(Payment),
.groups = 'drop') %>%
arrange(desc(total_revenue)) %>%
mutate(
revenue_dt = (total_revenue / sum(total_revenue)) * 100,
customer_dt = (customers / sum(customers)) * 100)
# city analysis (top cities);
city_summary <- customer_geography %>%
group_by(City, Country) %>%
summarise(
customers = n_distinct(VisitID),
transactions = n_distinct(transactionID),
total_revenue = sum(Payment),
avg_order_value = mean(Payment),
.groups = 'drop') %>%
arrange(desc(total_revenue)) %>%
head(15)
##graphing this data
p1 <- country_summary %>%
head(10) %>%
ggplot(aes(x = reorder(Country, total_revenue), y = total_revenue)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = dollar(total_revenue, accuracy = 1)),
hjust = -0.1, size = 3.5) +
coord_flip() +
scale_y_continuous(labels = dollar_format(),
expand = expansion(mult = c(0, 0.12))) +
labs(title = "Top 10 Countries by Revenue",
subtitle = "Where is Rita's webshop making money?",
x = NULL,
y = "Total Revenue") +
theme_minimal(base_size = 12) +
theme(panel.grid.major.y = element_blank())
p1
p1