This project analyzes retail sales performance using the Sample Superstore dataset. The goal of the project is to identify patterns in sales, profit, discounts, shipping modes, and customer segments using a variety of visualizations.
The project includes eight visualizations using multiple chart types including:
Bar Charts Scatter Plots Line Charts Box Plots Heatmaps Interactive Plotly Visualizations
dat <- read_csv("Sample_Superstore.CSV")
## Rows: 9994 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Segment, ...
## dbl (5): Row ID, Sales, Quantity, Discount, Profit
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat <- tibble(dat)
head(dat)
## # A tibble: 6 × 19
## `Row ID` `Order ID` `Order Date` `Ship Date` `Ship Mode` `Customer ID` Segment
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 CA-2016-1… 11-08-2016 11-11-2016 Second Cla… CG-12520 Consum…
## 2 2 CA-2016-1… 11-08-2016 11-11-2016 Second Cla… CG-12520 Consum…
## 3 3 CA-2016-1… 06-12-2016 6/16/2016 Second Cla… DV-13045 Corpor…
## 4 4 US-2015-1… 10-11-2015 10/18/2015 Standard C… SO-20335 Consum…
## 5 5 US-2015-1… 10-11-2015 10/18/2015 Standard C… SO-20335 Consum…
## 6 6 CA-2014-1… 06-09-2014 6/14/2014 Standard C… BH-11710 Consum…
## # ℹ 12 more variables: Country <chr>, City <chr>, State <chr>, Region <chr>,
## # `Product ID` <chr>, Category <chr>, `Sub-Category` <chr>,
## # `Product Name` <chr>, Sales <dbl>, Quantity <dbl>, Discount <dbl>,
## # Profit <dbl>
region_sales <- dat %>%
group_by(Region) %>%
summarise(Total_Sales = sum(Sales))
ggplot(region_sales,
aes(x = Region,
y = Total_Sales,
fill = Region)) +
geom_bar(stat = "identity") +
labs(
title = "Total Sales by Region",
x = "Region",
y = "Total Sales"
) +
theme_minimal()
ggplot(dat,
aes(x = Sales,
y = Profit)) +
geom_point(color = "blue",
alpha = 0.5) +
labs(
title = "Sales vs Profit",
x = "Sales",
y = "Profit"
) +
theme_minimal()
category_sales <- dat %>%
group_by(Category) %>%
summarise(Total_Sales = sum(Sales))
ggplot(category_sales,
aes(x = Category,
y = Total_Sales,
fill = Category)) +
geom_bar(stat = "identity") +
labs(
title = "Sales by Category",
x = "Category",
y = "Total Sales"
) +
theme_minimal()
ggplot(dat,
aes(x = `Ship Mode`,
y = Profit,
fill = `Ship Mode`)) +
geom_boxplot() +
labs(
title = "Profit Distribution by Ship Mode",
x = "Ship Mode",
y = "Profit"
) +
theme_minimal()
library(tidyverse)
library(lubridate)
dat <- read_csv("Sample_Superstore.csv")
## Rows: 9994 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Segment, ...
## dbl (5): Row ID, Sales, Quantity, Discount, Profit
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(dat) <- make.names(colnames(dat))
dat$Order.Date <- mdy(dat$Order.Date)
head(dat)
## # A tibble: 6 × 19
## Row.ID Order.ID Order.Date Ship.Date Ship.Mode Customer.ID Segment Country
## <dbl> <chr> <date> <chr> <chr> <chr> <chr> <chr>
## 1 1 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520 Consum… United…
## 2 2 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520 Consum… United…
## 3 3 CA-2016-138… 2016-06-12 6/16/2016 Second C… DV-13045 Corpor… United…
## 4 4 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335 Consum… United…
## 5 5 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335 Consum… United…
## 6 6 CA-2014-115… 2014-06-09 6/14/2014 Standard… BH-11710 Consum… United…
## # ℹ 11 more variables: City <chr>, State <chr>, Region <chr>, Product.ID <chr>,
## # Category <chr>, Sub.Category <chr>, Product.Name <chr>, Sales <dbl>,
## # Quantity <dbl>, Discount <dbl>, Profit <dbl>
dat$Order.Date <- as.Date(dat$Order.Date,
format="%m/%d/%Y")
monthly_sales <- dat %>%
mutate(Month = format(Order.Date, "%Y-%m")) %>%
group_by(Month) %>%
summarise(Total_Sales = sum(Sales))
ggplot(monthly_sales,
aes(x = Month,
y = Total_Sales,
group = 1)) +
geom_line(color = "darkgreen") +
theme(axis.text.x = element_text(angle = 90)) +
labs(
title = "Monthly Sales Trend",
x = "Month",
y = "Total Sales"
) +
theme_minimal()
ggplot(dat,
aes(x = Discount,
y = Profit)) +
geom_point(color = "red",
alpha = 0.5) +
labs(
title = "Discount vs Profit",
x = "Discount",
y = "Profit"
) +
theme_minimal()
heatmap_data <- dat %>%
group_by(Category, Region) %>%
summarise(Total_Sales = sum(Sales))
## `summarise()` has grouped output by 'Category'. You can override using the
## `.groups` argument.
ggplot(heatmap_data,
aes(x = Region,
y = Category,
fill = Total_Sales)) +
geom_tile() +
labs(
title = "Heatmap of Sales by Category and Region",
x = "Region",
y = "Category"
) +
theme_minimal()
library(tidyverse)
library(plotly)
library(lubridate)
dat <- read_csv("Sample_Superstore.csv")
## Rows: 9994 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Segment, ...
## dbl (5): Row ID, Sales, Quantity, Discount, Profit
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(dat) <- make.names(colnames(dat))
dat$Order.Date <- mdy(dat$Order.Date)
head(dat)
## # A tibble: 6 × 19
## Row.ID Order.ID Order.Date Ship.Date Ship.Mode Customer.ID Segment Country
## <dbl> <chr> <date> <chr> <chr> <chr> <chr> <chr>
## 1 1 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520 Consum… United…
## 2 2 CA-2016-152… 2016-11-08 11-11-20… Second C… CG-12520 Consum… United…
## 3 3 CA-2016-138… 2016-06-12 6/16/2016 Second C… DV-13045 Corpor… United…
## 4 4 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335 Consum… United…
## 5 5 US-2015-108… 2015-10-11 10/18/20… Standard… SO-20335 Consum… United…
## 6 6 CA-2014-115… 2014-06-09 6/14/2014 Standard… BH-11710 Consum… United…
## # ℹ 11 more variables: City <chr>, State <chr>, Region <chr>, Product.ID <chr>,
## # Category <chr>, Sub.Category <chr>, Product.Name <chr>, Sales <dbl>,
## # Quantity <dbl>, Discount <dbl>, Profit <dbl>
colnames(dat) <- make.names(colnames(dat))
interactive_plot <- ggplot(dat,
aes(x = Sales,
y = Profit,
color = Category)) +
geom_point(alpha = 0.6) +
labs(
title = "Interactive Sales vs Profit Visualization",
x = "Sales",
y = "Profit"
) +
theme_minimal()
ggplotly(interactive_plot)
This project analyzed retail sales data using multiple visualization techniques. The findings show how sales and profits vary across regions, categories, discounts, and shipping methods. Interactive and graphical analysis provides useful business insights that can support strategic decision-making and performance improvement.