Introduction

This project analyzes retail sales performance using the Sample Superstore dataset. The goal of the project is to identify patterns in sales, profit, discounts, shipping modes, and customer segments using a variety of visualizations.

The project includes eight visualizations using multiple chart types including:

Bar Charts Scatter Plots Line Charts Box Plots Heatmaps Interactive Plotly Visualizations

Import Data

dat <- read_csv("Sample_Superstore.CSV")
## Rows: 9994 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Segment, ...
## dbl  (5): Row ID, Sales, Quantity, Discount, Profit
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat <- tibble(dat)

head(dat)
## # A tibble: 6 × 19
##   `Row ID` `Order ID` `Order Date` `Ship Date` `Ship Mode` `Customer ID` Segment
##      <dbl> <chr>      <chr>        <chr>       <chr>       <chr>         <chr>  
## 1        1 CA-2016-1… 11-08-2016   11-11-2016  Second Cla… CG-12520      Consum…
## 2        2 CA-2016-1… 11-08-2016   11-11-2016  Second Cla… CG-12520      Consum…
## 3        3 CA-2016-1… 06-12-2016   6/16/2016   Second Cla… DV-13045      Corpor…
## 4        4 US-2015-1… 10-11-2015   10/18/2015  Standard C… SO-20335      Consum…
## 5        5 US-2015-1… 10-11-2015   10/18/2015  Standard C… SO-20335      Consum…
## 6        6 CA-2014-1… 06-09-2014   6/14/2014   Standard C… BH-11710      Consum…
## # ℹ 12 more variables: Country <chr>, City <chr>, State <chr>, Region <chr>,
## #   `Product ID` <chr>, Category <chr>, `Sub-Category` <chr>,
## #   `Product Name` <chr>, Sales <dbl>, Quantity <dbl>, Discount <dbl>,
## #   Profit <dbl>

Visualization 1 — Sales by Region

region_sales <- dat %>%
  group_by(Region) %>%
  summarise(Total_Sales = sum(Sales))

ggplot(region_sales,
       aes(x = Region,
           y = Total_Sales,
           fill = Region)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Total Sales by Region",
    x = "Region",
    y = "Total Sales"
  ) +
  theme_minimal()

Visualization 2 — Sales vs Profit

ggplot(dat,
       aes(x = Sales,
           y = Profit)) +
  geom_point(color = "blue",
             alpha = 0.5) +
  labs(
    title = "Sales vs Profit",
    x = "Sales",
    y = "Profit"
  ) +
  theme_minimal()

Visualization 3 — Sales by Category

category_sales <- dat %>%
  group_by(Category) %>%
  summarise(Total_Sales = sum(Sales))

ggplot(category_sales,
       aes(x = Category,
           y = Total_Sales,
           fill = Category)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Sales by Category",
    x = "Category",
    y = "Total Sales"
  ) +
  theme_minimal()

Visualization 4 — Profit by Ship Mode

ggplot(dat,
       aes(x = `Ship Mode`,
           y = Profit,
           fill = `Ship Mode`)) +
  geom_boxplot() +
  labs(
    title = "Profit Distribution by Ship Mode",
    x = "Ship Mode",
    y = "Profit"
  ) +
  theme_minimal()

Visualization 5 — Monthly Sales Trend

library(tidyverse)
library(lubridate)

dat <- read_csv("Sample_Superstore.csv")
## Rows: 9994 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Segment, ...
## dbl  (5): Row ID, Sales, Quantity, Discount, Profit
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(dat) <- make.names(colnames(dat))

dat$Order.Date <- mdy(dat$Order.Date)

head(dat)
## # A tibble: 6 × 19
##   Row.ID Order.ID     Order.Date Ship.Date Ship.Mode Customer.ID Segment Country
##    <dbl> <chr>        <date>     <chr>     <chr>     <chr>       <chr>   <chr>  
## 1      1 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520    Consum… United…
## 2      2 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520    Consum… United…
## 3      3 CA-2016-138… 2016-06-12 6/16/2016 Second C… DV-13045    Corpor… United…
## 4      4 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335    Consum… United…
## 5      5 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335    Consum… United…
## 6      6 CA-2014-115… 2014-06-09 6/14/2014 Standard… BH-11710    Consum… United…
## # ℹ 11 more variables: City <chr>, State <chr>, Region <chr>, Product.ID <chr>,
## #   Category <chr>, Sub.Category <chr>, Product.Name <chr>, Sales <dbl>,
## #   Quantity <dbl>, Discount <dbl>, Profit <dbl>
dat$Order.Date <- as.Date(dat$Order.Date,
                          format="%m/%d/%Y")

monthly_sales <- dat %>%
  mutate(Month = format(Order.Date, "%Y-%m")) %>%
  group_by(Month) %>%
  summarise(Total_Sales = sum(Sales))

ggplot(monthly_sales,
       aes(x = Month,
           y = Total_Sales,
           group = 1)) +
  geom_line(color = "darkgreen") +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(
    title = "Monthly Sales Trend",
    x = "Month",
    y = "Total Sales"
  ) +
  theme_minimal()

Visualization 6 — Discount vs Profit

ggplot(dat,
       aes(x = Discount,
           y = Profit)) +
  geom_point(color = "red",
             alpha = 0.5) +
  labs(
    title = "Discount vs Profit",
    x = "Discount",
    y = "Profit"
  ) +
  theme_minimal()

Visualization 7 — Heatmap of Sales by Category and Region

heatmap_data <- dat %>%
  group_by(Category, Region) %>%
  summarise(Total_Sales = sum(Sales))
## `summarise()` has grouped output by 'Category'. You can override using the
## `.groups` argument.
ggplot(heatmap_data,
       aes(x = Region,
           y = Category,
           fill = Total_Sales)) +
  geom_tile() +
  labs(
    title = "Heatmap of Sales by Category and Region",
    x = "Region",
    y = "Category"
  ) +
  theme_minimal()

Visualization 8 — Interactive Plotly Visualization

library(tidyverse)
library(plotly)
library(lubridate)

dat <- read_csv("Sample_Superstore.csv")
## Rows: 9994 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Segment, ...
## dbl  (5): Row ID, Sales, Quantity, Discount, Profit
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(dat) <- make.names(colnames(dat))

dat$Order.Date <- mdy(dat$Order.Date)

head(dat)
## # A tibble: 6 × 19
##   Row.ID Order.ID     Order.Date Ship.Date Ship.Mode Customer.ID Segment Country
##    <dbl> <chr>        <date>     <chr>     <chr>     <chr>       <chr>   <chr>  
## 1      1 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520    Consum… United…
## 2      2 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520    Consum… United…
## 3      3 CA-2016-138… 2016-06-12 6/16/2016 Second C… DV-13045    Corpor… United…
## 4      4 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335    Consum… United…
## 5      5 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335    Consum… United…
## 6      6 CA-2014-115… 2014-06-09 6/14/2014 Standard… BH-11710    Consum… United…
## # ℹ 11 more variables: City <chr>, State <chr>, Region <chr>, Product.ID <chr>,
## #   Category <chr>, Sub.Category <chr>, Product.Name <chr>, Sales <dbl>,
## #   Quantity <dbl>, Discount <dbl>, Profit <dbl>
colnames(dat) <- make.names(colnames(dat))
interactive_plot <- ggplot(dat,
                           aes(x = Sales,
                               y = Profit,
                               color = Category)) +
  geom_point(alpha = 0.6) +
  labs(
    title = "Interactive Sales vs Profit Visualization",
    x = "Sales",
    y = "Profit"
  ) +
  theme_minimal()

ggplotly(interactive_plot)

Conclusion

This project analyzed retail sales data using multiple visualization techniques. The findings show how sales and profits vary across regions, categories, discounts, and shipping methods. Interactive and graphical analysis provides useful business insights that can support strategic decision-making and performance improvement.