The Dataset contains 51,290 records of orders placed on Global Superstore, a fictional company from 1st January 2011 to 31st December, 2014. Each record contains information on:
Technology is the most profitable product category followed by Office supplies and finally furniture. This ranking has remained consistent across all years.
Despite furniture being a profitable category, Global superstore has recorded a loss from the sale of tables each year.
Phones, chairs and accessories are the most profitable product
subcategories
The Asia-pacific region is the most profitable market for the Store bringing in over $436,000 in profits over four years.
Sales show a yearly pattern of dips around October and July.
Data Source https://www.kaggle.com/datasets/shekpaul/global-superstore?
Code
#Loading useful packages
library(tidyverse)
library(RColorBrewer)
library(scales)
library(extrafont)
#Setting a theme
theme_set(theme_classic()+
theme(
plot.title = element_text(hjust = 0.5,
family = "Corbel",
size = 20,
face = "bold"
),
plot.subtitle = element_text(hjust = 0.5,
family = "Corbel",
size = 16,
face = "bold"),
plot.caption = element_text(family = "Corbel",
size = 8,
face = "bold"),
axis.title.x = element_text(color = "black",
family = "Corbel",
size = 13,
face = "bold"),
axis.text.x = element_text(size = 11),
axis.title.y = element_text(color = "black",
family = "Corbel",
size = 13,
face = "bold"),
axis.text.y = element_text(size = 11),
panel.grid.major = element_line(color = "grey90")
)
)
#Loading files
GS <- read.csv("Global_Superstore.csv")
glimpse(GS)
#Customer segments
GS %>%
ggplot(aes(x = Segment, fill = Segment))+
geom_bar(colour = "black",
linewidth = 1)+
geom_text(aes(label = scales::comma(after_stat(count))),
stat = "count", vjust = 1.5,
colour = "white", fontface = "bold")+
scale_y_continuous(name = NULL,
limits = c(0, 28000),
breaks = seq(0, 25000, 10000),
expand = expansion(0))+
scale_x_discrete(name = "Customer segment")+
scale_fill_manual(name = "Customer segment",
values = c("#fa560a", "#a5bd08", "#089106"))+
labs(title = "Customer Segments")+
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank())
#Note: Consumers make up just over half the customers
#Most Profitable Product Category
GS %>%
mutate(Category = as.factor(Category),
Category = fct_relevel(Category, "Technology",
"Office Supplies",
"Furniture"),
Order.Date = dmy(Order.Date),
Order.Year = year(Order.Date)) %>%
group_by(Category, Order.Year) %>%
summarise(Total.Profit = sum(Profit)) %>%
ggplot(aes(x = Order.Year, y = Total.Profit, fill = Category))+
geom_col(colour = "black",
linewidth = 1,
position = position_dodge())+
geom_text(aes(label = scales::number(Total.Profit,
scale = 1e-3,
suffix = "K",
prefix = "$")),
position = position_dodge(width = 0.9),
vjust = 1.5,
colour = "white",
fontface = "bold",
size = 4)+
scale_y_continuous(name = "Profit",
labels = label_number(suffix = "K",
prefix = "$",
scale = 1e-3),
limits = c(0, 2.5e5),
breaks = seq(0, 2.5e5, 1e5),
expand = expansion(0))+
scale_x_continuous(name = "Year")+
scale_fill_manual(values = c("#d13404", "#d1ca04", "#70b366"))+
labs(title = "Technology products brought in the most profit each year")+
theme(legend.position = "bottom",
legend.direction = "horizontal",
axis.text.y = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank())
#Most Profitable market
GS %>%
mutate(Market = as.factor(Market)) %>%
group_by(Market) %>%
summarise(Total.Profit = sum(Profit)) %>%
ggplot(aes(x = reorder(Market, -Total.Profit), y = Total.Profit,
fill = Market))+
geom_col(colour = "black",
linewidth = 1)+
geom_text(aes(label = scales::number(Total.Profit,
scale = 1e-3,
suffix = "K",
prefix = "$")),
position = "Stack",
vjust = 1.5,
colour = "white",
fontface = "bold",
size = 4)+
scale_y_continuous(name = "Profit",
labels = label_number(suffix = "K",
prefix = "$",
scale = 1e-3),
limits = c(0, 5e5),
breaks = seq(0, 5e5, 1e5),
expand = expansion(0))+
scale_x_discrete(name = "Market")+
scale_fill_manual(values = c("#7f8a82", "#1cb5ed", "#7f8a82",
"#7f8a82", "#1cb5ed", "#7f8a82",
"#1cb5ed"))+
labs(title = "Global Superstore's most profitable market is the Asia-
Pacific region followed by the EU and US")+
theme(legend.position = "None",
axis.text.y = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank())
#Top 10 most profitable subcategory
GS %>%
mutate(Sub.Category = as.factor(Sub.Category),
Sub.Category = fct_lump_n(Sub.Category, 10)) %>%
group_by(Sub.Category) %>%
summarise(Total.Profit = sum(Profit)) %>%
filter(Sub.Category != "Other") %>%
ggplot(aes(x = reorder(Sub.Category, -Total.Profit), y = Total.Profit,
fill = Sub.Category))+
geom_col(colour = "black",
linewidth = 1)+
geom_text(aes(label = scales::number(Total.Profit,
scale = 1e-3,
suffix = "K",
prefix = "$")),
position = "Stack",
vjust = 1.5,
colour = "white",
fontface = "bold",
size = 4)+
scale_y_continuous(name = "Profit",
labels = label_number(suffix = "K",
prefix = "$",
scale = 1e-3),
limits = c(0, 2.5e5),
breaks = seq(0, 2.5e5, 1e5),
expand = expansion(0))+
scale_x_discrete(name = "Sub-category")+
scale_fill_manual(values = c("#19fc5d", "#7f8a82", "#7f8a82",
"#19fc5d", "#7f8a82", "#7f8a82",
"#7f8a82", "#7f8a82", "#19fc5d",
"#7f8a82"))+
labs(title = "Top 10 most profitable product sub-categories")+
theme(legend.position = "none")
# Note: The Store get its most profit from sales of
# Phones, chairs and Accessories
#Sales across the years
GS %>%
mutate(Order.Date = dmy(Order.Date),
Order.Month = format_ISO8601(Order.Date, precision = "ym"),
Order.Month = ym(Order.Month)) %>%
group_by(Order.Month) %>%
summarise(Total.Sales = sum(Sales)) %>%
ggplot(aes(x = Order.Month, y = Total.Sales))+
geom_line(linewidth = 1.1)+
geom_vline(linetype = "dashed",
colour = "blue",
xintercept = c(as.Date("2011-10-01"),
as.Date("2012-10-01"),
as.Date("2013-10-01"),
as.Date("2014-10-01")))+
scale_x_date(name = "Month",
date_breaks = "4 months",
date_labels = "%b%Y")+
scale_y_continuous(name = "Total Sales",
labels = label_number(suffix = "K",
prefix = "$",
scale = 1e-3))+
labs(title = "Annual dip in sales around October")
GS %>%
mutate(Order.Date = dmy(Order.Date),
Order.Year = year(Order.Date)) %>%
group_by(Sub.Category, Order.Year) %>%
summarise(Total.Profit = sum(Profit)) %>%
filter(Total.Profit < 0) %>%
ggplot(aes(x = Order.Year, y = Total.Profit, fill = Sub.Category))+
geom_col(colour = "black",
linewidth = 1)+
geom_text(aes(label = scales::number(Total.Profit,
scale = 1e-3,
suffix = "K",
prefix = "$")),
vjust = -1.2,
colour = "white",
fontface = "bold",
size = 4)+
scale_x_continuous(name = "Year")+
scale_y_continuous(name = "Loss",
labels = label_number(suffix = "K",
prefix = "$",
scale = 1e-3),
limits = c(-32000, 0),
expand = expansion(0))+
labs(title = "Global Superstores has recorded a loss from
sales of tables each year")+
scale_fill_manual(values = "#fa0a0a")+
theme(legend.position = "none")
#Note: Tables are not bringing in any money