Description

The Dataset contains 51,290 records of orders placed on Global Superstore, a fictional company from 1st January 2011 to 31st December, 2014. Each record contains information on:

  1. The date the order was placed
  2. Location from where it was placed
  3. Customer details
  4. Shipment information
  5. Profit the company made on that order
  6. Order return Status.

Visualisations

Technology is the most profitable product category followed by Office supplies and finally furniture. This ranking has remained consistent across all years.

Despite furniture being a profitable category, Global superstore has recorded a loss from the sale of tables each year.

Phones, chairs and accessories are the most profitable product subcategories

The Asia-pacific region is the most profitable market for the Store bringing in over $436,000 in profits over four years.

Sales show a yearly pattern of dips around October and July.

Appendix

Data Source https://www.kaggle.com/datasets/shekpaul/global-superstore?

Code

#Loading useful packages
library(tidyverse)
library(RColorBrewer)
library(scales)
library(extrafont)

#Setting a theme
theme_set(theme_classic()+
            theme(
              plot.title = element_text(hjust = 0.5, 
                                        family = "Corbel",
                                        size = 20,
                                        face = "bold"
              ),
              plot.subtitle = element_text(hjust = 0.5,
                                           family = "Corbel",
                                           size = 16,
                                           face = "bold"),
              plot.caption = element_text(family = "Corbel",
                                          size = 8,
                                          face = "bold"),
              axis.title.x = element_text(color = "black",
                                          family = "Corbel",
                                          size = 13,
                                          face = "bold"),
              axis.text.x = element_text(size = 11),
              axis.title.y = element_text(color = "black",
                                          family = "Corbel",
                                          size = 13,
                                          face = "bold"),
              axis.text.y = element_text(size = 11),
              panel.grid.major = element_line(color = "grey90")
            )
)

#Loading files
GS <- read.csv("Global_Superstore.csv")
glimpse(GS)

#Customer segments
GS %>% 
  ggplot(aes(x = Segment, fill = Segment))+
  geom_bar(colour = "black",
           linewidth = 1)+
  geom_text(aes(label = scales::comma(after_stat(count))),
            stat = "count", vjust = 1.5,
            colour = "white", fontface = "bold")+
  scale_y_continuous(name = NULL,
                     limits = c(0, 28000),
                     breaks = seq(0, 25000, 10000),
                     expand = expansion(0))+
  scale_x_discrete(name = "Customer segment")+
  scale_fill_manual(name = "Customer segment",
                    values = c("#fa560a", "#a5bd08", "#089106"))+
  labs(title = "Customer Segments")+
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major = element_blank())
#Note: Consumers make up just over half the customers

#Most Profitable Product Category
GS %>% 
  mutate(Category = as.factor(Category),
         Category = fct_relevel(Category, "Technology",
                                "Office Supplies",
                                "Furniture"),
         Order.Date = dmy(Order.Date),
         Order.Year = year(Order.Date)) %>%
  group_by(Category, Order.Year) %>% 
  summarise(Total.Profit = sum(Profit)) %>% 
  ggplot(aes(x = Order.Year, y = Total.Profit, fill = Category))+
  geom_col(colour = "black",
           linewidth = 1,
           position = position_dodge())+
  geom_text(aes(label = scales::number(Total.Profit,
                                       scale = 1e-3,
                                       suffix = "K",
                                       prefix = "$")),
            position = position_dodge(width = 0.9),
            vjust = 1.5,
            colour = "white",
            fontface = "bold",
            size = 4)+
  scale_y_continuous(name = "Profit",
                     labels = label_number(suffix = "K",
                                            prefix = "$",
                                            scale = 1e-3),
                     limits = c(0, 2.5e5),
                     breaks = seq(0, 2.5e5, 1e5),
                     expand = expansion(0))+
  scale_x_continuous(name = "Year")+
  scale_fill_manual(values = c("#d13404", "#d1ca04", "#70b366"))+
  labs(title = "Technology products brought in the most profit each year")+
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        axis.text.y = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major = element_blank())



#Most Profitable market
GS %>% 
  mutate(Market = as.factor(Market)) %>% 
  group_by(Market) %>% 
  summarise(Total.Profit = sum(Profit)) %>% 
  ggplot(aes(x = reorder(Market, -Total.Profit), y = Total.Profit, 
             fill = Market))+
  geom_col(colour = "black",
           linewidth = 1)+
  geom_text(aes(label = scales::number(Total.Profit,
                                       scale = 1e-3,
                                       suffix = "K",
                                       prefix = "$")),
            position = "Stack",
            vjust = 1.5,
            colour = "white",
            fontface = "bold",
            size = 4)+
  scale_y_continuous(name = "Profit",
                     labels = label_number(suffix = "K",
                                           prefix = "$",
                                           scale = 1e-3),
                     limits = c(0, 5e5),
                     breaks = seq(0, 5e5, 1e5),
                     expand = expansion(0))+
  scale_x_discrete(name = "Market")+
  scale_fill_manual(values = c("#7f8a82", "#1cb5ed", "#7f8a82",
                               "#7f8a82", "#1cb5ed", "#7f8a82",
                               "#1cb5ed"))+
  labs(title = "Global Superstore's most profitable market is the Asia-
       Pacific region followed by the EU and US")+
  theme(legend.position = "None",
        axis.text.y = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major = element_blank())


#Top 10 most profitable subcategory
GS %>% 
  mutate(Sub.Category = as.factor(Sub.Category),
         Sub.Category = fct_lump_n(Sub.Category, 10)) %>% 
  group_by(Sub.Category) %>% 
  summarise(Total.Profit = sum(Profit)) %>%
  filter(Sub.Category != "Other") %>% 
  ggplot(aes(x = reorder(Sub.Category, -Total.Profit), y = Total.Profit,
             fill = Sub.Category))+ 
  geom_col(colour = "black",
           linewidth = 1)+
  geom_text(aes(label = scales::number(Total.Profit,
                                       scale = 1e-3,
                                       suffix = "K",
                                       prefix = "$")),
            position = "Stack",
            vjust = 1.5,
            colour = "white",
            fontface = "bold",
            size = 4)+
  scale_y_continuous(name = "Profit",
                     labels = label_number(suffix = "K",
                                           prefix = "$",
                                           scale = 1e-3),
                     limits = c(0, 2.5e5),
                     breaks = seq(0, 2.5e5, 1e5),
                     expand = expansion(0))+
  scale_x_discrete(name = "Sub-category")+
  scale_fill_manual(values = c("#19fc5d", "#7f8a82", "#7f8a82", 
                               "#19fc5d", "#7f8a82", "#7f8a82",
                               "#7f8a82", "#7f8a82", "#19fc5d",
                               "#7f8a82"))+
  labs(title = "Top 10 most profitable product sub-categories")+
  theme(legend.position = "none")
# Note: The Store get its most profit from sales of
# Phones, chairs and Accessories


#Sales across the years
GS %>% 
  mutate(Order.Date = dmy(Order.Date),
         Order.Month = format_ISO8601(Order.Date, precision = "ym"),
         Order.Month = ym(Order.Month)) %>%
  group_by(Order.Month) %>% 
  summarise(Total.Sales = sum(Sales)) %>% 
  ggplot(aes(x = Order.Month, y = Total.Sales))+
  geom_line(linewidth = 1.1)+
  geom_vline(linetype = "dashed",
             colour = "blue",
             xintercept = c(as.Date("2011-10-01"),
                            as.Date("2012-10-01"),
                            as.Date("2013-10-01"),
                            as.Date("2014-10-01")))+
  scale_x_date(name = "Month",
               date_breaks = "4 months",
               date_labels = "%b%Y")+
  scale_y_continuous(name = "Total Sales",
                     labels = label_number(suffix = "K",
                                           prefix = "$",
                                           scale = 1e-3))+
  labs(title = "Annual dip in sales around October")

GS %>% 
  mutate(Order.Date = dmy(Order.Date),
         Order.Year = year(Order.Date)) %>% 
  group_by(Sub.Category, Order.Year) %>% 
  summarise(Total.Profit = sum(Profit)) %>% 
  filter(Total.Profit < 0) %>% 
  ggplot(aes(x = Order.Year, y = Total.Profit, fill = Sub.Category))+ 
  geom_col(colour = "black",
           linewidth = 1)+
  geom_text(aes(label = scales::number(Total.Profit,
                                       scale = 1e-3,
                                       suffix = "K",
                                       prefix = "$")),
            vjust = -1.2,
            colour = "white",
            fontface = "bold",
            size = 4)+
  scale_x_continuous(name = "Year")+
  scale_y_continuous(name = "Loss",
                     labels = label_number(suffix = "K",
                                           prefix = "$",
                                           scale = 1e-3), 
                     limits = c(-32000, 0),
                     expand = expansion(0))+
  labs(title = "Global Superstores has recorded a loss from 
       sales of tables each year")+
  scale_fill_manual(values = "#fa0a0a")+
  theme(legend.position = "none")
#Note: Tables are not bringing in any money