library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(tidyr)
library(scales )
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

Our data has been cleaned from the previous project.

TASK1: CONVERT CATEGORICAL VARIABLES TO FACTORS

sales_details<-read.csv("/Users/mac/Downloads/Nike sales cleaned data.csv")
glimpse(sales_details)
## Rows: 2,500
## Columns: 12
## $ Order_ID         <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,…
## $ Gender_Category  <chr> "Kids", "Women", "Women", "Kids", "Kids", "Women", "M…
## $ Product_Line     <chr> "Training", "Soccer", "Soccer", "Lifestyle", "Running…
## $ Product_Name     <chr> "SuperRep Go", "Tiempo Legend", "Premier III", "Blaze…
## $ Size             <chr> "M", "M", "M", "L", "XL", "M", "M", "M", "XI", "M", "…
## $ Units_Sold       <int> 2, 3, 4, 2, 2, 1, 2, 1, 1, 4, 1, 1, 1, 2, 2, 2, 1, 4,…
## $ MRP              <int> 6040, 4958, 6040, 9674, 6040, 7364, 6820, 6040, 6040,…
## $ Discount_Applied <dbl> 0.470, 0.615, 0.615, 0.615, 0.615, 0.615, 0.615, 0.32…
## $ Revenue          <int> 6402, 5726, 9302, 7449, 4651, 2835, 5251, 4107, 2325,…
## $ Order_Date       <chr> "2024-09-03", "2024-09-07", "2024-10-11", "2024-10-04…
## $ Sales_Channel    <chr> "Online", "Retail", "Retail", "Online", "Retail", "Re…
## $ Region           <chr> "Bengaluru", "Hyd", "Mumbai", "Pune", "Delhi", "Delhi…
colSums(is.na(sales_details))
##         Order_ID  Gender_Category     Product_Line     Product_Name 
##                0                0                0                0 
##             Size       Units_Sold              MRP Discount_Applied 
##              510                0                0                0 
##          Revenue       Order_Date    Sales_Channel           Region 
##                0                0                0                0
# convert categorical variables to factors

sales_details$Gender_Category <- as.factor(sales_details$Gender_Category)
sales_details$Product_Line <- as.factor(sales_details$Product_Line)
sales_details$Product_Name <- as.factor(sales_details$Product_Name)
sales_details$Size <- as.factor(sales_details$Size)
sales_details$Sales_Channel <- as.factor(sales_details$Sales_Channel)
sales_details$Region <- as.factor(sales_details$Region)
sales_details$Product_Line<-as.factor(sales_details$Product_Line)

EXPLORATORY DATA ANALYSIS

TASK2: Let’s find the summary statistics of Key numerical columns

sales_details %>% 
  summarise(
    avg_units_sold = mean(Units_Sold, na.rm=TRUE),
    avg_mrp = mean(MRP, na.rm=TRUE),
    avg_discount = mean(Discount_Applied, na.rm=TRUE),
    avg_revenue = mean(Revenue, na.rm=TRUE),
    total_revenue = sum(Revenue, na.rm=TRUE),
    total_units_sold = sum(Units_Sold, na.rm=TRUE)
  )
##   avg_units_sold  avg_mrp avg_discount avg_revenue total_revenue
## 1         1.9916 6039.936       0.5425    5504.196      13760489
##   total_units_sold
## 1             4979

ANALYZING THE HIGHEST ORDER IN VARIOUS SECTIONS USING HISTOGRAM

Orders by Units_Sold

ggplot(sales_details, aes(Units_Sold)) +
  geom_histogram(bins = 30, fill="lightgreen", color="white") +
  theme_minimal()

From the plot, most common order size is 2 units.

Orders by Gender Category

ggplot(sales_details, aes(Gender_Category)) +
  geom_bar(fill="lightgreen") +
  theme_minimal() +
  labs(title="Orders by Gender Category")

From above, kids account for the highest number of orders.

Orders by Region

c<-ggplot(sales_details, aes(Region)) +
  geom_bar(fill="lightgreen") +
  theme_minimal() +
  labs(title="Orders by Region")
ggsave("Delhi.png",plot=c)
## Saving 7 x 5 in image

From above, most orders originated from Delhi than any other region. This indicates that customers from Delhi are the most active contributing the largest share of total orders.

Orders by Channel

ggplot(sales_details, aes(Sales_Channel)) +
  geom_bar(fill="lightgreen") +
  theme_minimal() +
  labs(title="Orders by Sales Channel")

From above, the online channel has the highest number of orders.This indicates that online channel is the most active channel in terms of orders.

TASK 3: TOP PRODUCT LINES

Let’s group Product Name by Product line

library(dplyr)

# Summarize units sold per Product Name within each Product Line
product_summary <- sales_details %>%
  group_by(Product_Line, Product_Name) %>%
  summarise(total_units = sum(Units_Sold, na.rm = TRUE),
            total_revenue = sum(Revenue, na.rm = TRUE)) %>%
  arrange(Product_Line, desc(total_units))  # highest units first per line
## `summarise()` has grouped output by 'Product_Line'. You can override using the
## `.groups` argument.
product_summary
## # A tibble: 20 × 4
## # Groups:   Product_Line [5]
##    Product_Line Product_Name       total_units total_revenue
##    <fct>        <fct>                    <int>         <int>
##  1 Basketball   LeBron 20                  278        811545
##  2 Basketball   Air Jordan                 265        767045
##  3 Basketball   Zoom Freak                 246        703635
##  4 Basketball   Kyrie Flytrap              217        540874
##  5 Lifestyle    Waffle One                 279        742751
##  6 Lifestyle    Blazer Mid                 267        732817
##  7 Lifestyle    Dunk Low                   243        705320
##  8 Lifestyle    Air Force 1                239        708072
##  9 Running      React Infinity             275        679475
## 10 Running      Free RN                    240        661652
## 11 Running      Air Zoom                   206        563047
## 12 Running      Pegasus Turbo              198        572598
## 13 Soccer       Phantom GT                 255        707336
## 14 Soccer       Premier III                242        605251
## 15 Soccer       Tiempo Legend              224        634785
## 16 Soccer       Mercurial Superfly         217        604607
## 17 Training     SuperRep Go                308        835423
## 18 Training     Flex Trainer               282        782761
## 19 Training     ZoomX Invincible           274        779115
## 20 Training     Metcon 7                   224        622380

TOTAL UNITS SOLD BY PRODUCT LINE

productline_units <- sales_details %>%
  group_by(Product_Line) %>%
  summarise(total_units = sum(Units_Sold, na.rm = TRUE)) %>%
  arrange(desc(total_units))

ggplot(productline_units, aes(x = reorder(Product_Line, total_units), y = total_units)) +
  geom_col(fill = "lightgreen") +
  geom_text(aes(label = total_units), hjust = -0.05) +
  coord_flip() +
  theme_minimal() +
  labs(title = "Total Units Sold by Product Line", x = "Product Line", y ="Total Units Sold") +
  scale_y_continuous(expand = expansion(mult = c(0,0.25)))

From above, Training Product line is the highest units sold in Nike Sales.

TOTAL REVENUE BY PRODUCT LINE

productline_revenue <- sales_details %>%
  group_by(Product_Line) %>%
  summarise(total_revenue = sum(Revenue, na.rm = TRUE)) %>%
  arrange(desc(total_revenue))

ggplot(productline_revenue, aes(x = reorder(Product_Line, total_revenue), y = total_revenue)) +
  geom_col(fill = "lightgreen") +
  geom_text(aes(label = paste0("#", format(total_revenue, big.mark=","))), hjust = -0.05) +
  coord_flip() +
  theme_minimal() +
  labs(title = "Total Revenue by Product Line", x = "Product Line", y ="Total Revenue") +
  scale_y_continuous(expand = expansion(mult = c(0,0.25)))

From above, Training product line generates the highest revenue for Nike Sales store.

UNITS SOLD BY PRODUCT LINE PER GENDER

ggplot(sales_details, aes(x = reorder(Product_Line, Units_Sold), y = Units_Sold, fill = Gender_Category)) +
  geom_col(show.legend = FALSE) +
  #geom_text(aes(label = Units_Sold), hjust = -0.05, size = 3) +
  coord_flip() +
  facet_wrap(~ Gender_Category, scales = "free_y") +
  theme_minimal() +
  labs(title = "Units Sold by Product Line per Gender", x ="Product Line" , y ="Gender" ) +
  scale_y_continuous(expand = expansion(mult = c(0,0.25)))

Lifestyle and Training are the common product line amidst Kids Training is the most common product line amidst Men Lifestyle and Training are the most common product line amidst women.

UNITS SOLD BY PRODUCT LINE PER REGION

ggplot(sales_details, aes(x = reorder(Product_Line, Units_Sold), y = Units_Sold, fill =Region)) +
  geom_col(show.legend = FALSE) +
  #geom_text(aes(label = Units_Sold), hjust = 0.2, size = 2) +
  coord_flip() +
  facet_wrap(~Region, scales = "free_y") +
  theme_minimal() +
  labs(title = "Units Sold by Product Line per Region", x = "Product Line", y ="Region") +
  scale_y_continuous(expand = expansion(mult = c(0,0.45)))

Training Product Line has the highest unit sold in Bangalore, Bengaluru, Delhi,Mumbai and Pune Lifestyle Product Line has the highest unit sold in Hyd and Hyderabad Basketball Product Line has the highest unit sold in Hyderbad and Kolkata

UNITS SOLD BY PRODUCT LINE PER SALES CHANNEL

ggplot(sales_details, aes(x = reorder(Product_Line, Units_Sold), y = Units_Sold, fill =Sales_Channel)) +
  geom_col(show.legend = FALSE) +
  #geom_text(aes(label = Units_Sold), hjust = -0.05, size = 4) +
  coord_flip() +
  facet_wrap(~Sales_Channel, scales = "free_y") +
  theme_minimal() +
  labs(title = "Units Sold by Product Line per Sales Channel", x = "Product Line", y = "Sales Channel") +
  scale_y_continuous(expand = expansion(mult = c(0,0.45)))

Training has the highest order online Basketball has the highest order in Retail shops.

Insights: Training and Lifestyle Product Line is the most popular Product Line

TASK4: TOP PRODUCTS NAMES

GET THE TOP TEN PRODUCTS BY REVENUE

# Summarize top 10 products by revenue
top_products <- sales_details %>% 
  group_by(Product_Name) %>% 
  summarise(total_revenue = sum(Revenue, na.rm = TRUE)) %>% 
  arrange(desc(total_revenue)) %>% 
  slice_head(n = 10)
top_products
## # A tibble: 10 × 2
##    Product_Name     total_revenue
##    <fct>                    <int>
##  1 SuperRep Go             835423
##  2 LeBron 20               811545
##  3 Flex Trainer            782761
##  4 ZoomX Invincible        779115
##  5 Air Jordan              767045
##  6 Waffle One              742751
##  7 Blazer Mid              732817
##  8 Air Force 1             708072
##  9 Phantom GT              707336
## 10 Dunk Low                705320
# Horizontal bar chart without numeric x-axis
ggplot(top_products, aes(x = reorder(Product_Name, total_revenue), y = total_revenue)) +
  geom_col(fill = "lightgreen") +
  geom_text(aes(label = paste0("#", format(total_revenue, big.mark = ","))),
            hjust = -0.05, size = 4) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Top 10 Products by Revenue",
    x = "Product Name",               # omit x-axis label
    y = "Total Revenue"      # y-axis label
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.25))) 

From above, SuperRep Go generated the highest revenue among all products. This indicates that it is the best performing product, contributing the largest share to total sales.

Recommendation:

Nike Company should consider promoting this product(SuperRep Go) futher.

TOP 10 products by Unit Sold

# Top 10 products by units sold
top_products_units <- sales_details %>%
  group_by(Product_Name) %>%
  summarise(total_units = sum(Units_Sold, na.rm = TRUE)) %>%
  arrange(desc(total_units)) %>%
  slice_head(n = 10)

# Plot
ggplot(top_products_units, aes(x = reorder(Product_Name, total_units), y = total_units)) +
  geom_col(fill = "lightgreen") +
  geom_text(aes(label = total_units), hjust = -0.05, size = 4) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Top 10 Products by Units Sold",
    x = "Product Name",
    y = "Total Units Sold"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.25)))

From above, SuperRep Go Product was purchased in the largest Quantity.

TOP 3 PRODUCTS BY UNITS SOLD PER GENDER CATEGORY

top_products_gender <- sales_details %>%
  group_by(Gender_Category, Product_Name) %>%
  summarise(total_units = sum(Units_Sold, na.rm = TRUE)) %>%
  arrange(Gender_Category, desc(total_units)) %>%
  slice_head(n = 3)
## `summarise()` has grouped output by 'Gender_Category'. You can override using
## the `.groups` argument.
# Plot with facets by gender
ggplot(top_products_gender, aes(x = reorder(Product_Name, total_units), y = total_units, fill = Gender_Category)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = total_units), hjust = -0.05, size = 4) +
  coord_flip() +
  facet_wrap(~ Gender_Category, scales = "free_y") +
  theme_minimal() +
  labs(
    title = "Top 3 Products by Units Sold per Gender",
    x ="Product Name",
    y = "Total Units Sold"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.45)))

From above, Blazer Mid is most popular amidst Kids

Flex Trainer is most popular amidst Men

Zoomx Invincible is most popular amidst Women

Recommendation

From the order by gender analysis, Kids have the highest number of orders, I will advise Nike Sales to get more Blazer Mid in their storeto increase orders in the store.

TOP 3 PRODUCTS BY UNITS SOLD PER REGION

top_products_region <- sales_details %>%
  group_by(Region, Product_Name) %>%
  summarise(total_units = sum(Units_Sold, na.rm = TRUE)) %>%
  arrange(Region, desc(total_units)) %>%
  slice_head(n = 3)
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
# Plot with facets by region
ggplot(top_products_region, aes(x = reorder(Product_Name, total_units), y = total_units, fill = Region)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = total_units), hjust = -0.05, size = 4) +
  coord_flip() +
  facet_wrap(~ Region, scales = "free_y") +
  theme_minimal() +
  labs(
    title = "Top 3 Products by Units Sold per Region",
    x = "Product Name",
    y = "Total Units Sold"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.45)))

SuperRep Go is popular in Delhi,Mumbai and Pune

Metcon 7 is popular in Bangalore and Bengaluru

Waffle One is popular in Hyd.

Dunk Low is popular in Hyderabad

Air Jordan is popular in Hyderbad

Lebron 20 is popular in Kolkata

TOP 3 PRODUCTS BY UNITS SOLD PER SALES CHANNEL

top_products_channel <- sales_details %>%
  group_by(Sales_Channel, Product_Name) %>%
  summarise(total_units = sum(Units_Sold, na.rm = TRUE)) %>%
  arrange(Sales_Channel, desc(total_units)) %>%
  slice_head(n = 3)
## `summarise()` has grouped output by 'Sales_Channel'. You can override using the
## `.groups` argument.
# Plot with facets by sales channel
ggplot(top_products_channel, aes(x = reorder(Product_Name, total_units), y = total_units, fill = Sales_Channel)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = total_units), hjust = -0.05, size = 4) +
  coord_flip() +
  facet_wrap(~ Sales_Channel, scales = "free_y") +
  theme_minimal() +
  labs(
    title = "Top 3 Products by Units Sold per Sales Channel",
    x = "Product Name",
    y = "Total Units Sold"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.45)))

ZoomX invincible has the highest order online while SuperRep Go has the highest order in retail stores.

Recommendation: Since Kids order more, Blazer Mild should be stocked in stores and made available online

SuperRep Go is very popular across all regions, channels, and genders. Nike should feature it as a flagship product in stores and promotions.

Flex Trainer should be stocked as it is popular amidst Men ZoomX Invincible should also be stocked as it is popular amidst Women.

Most popular product falls under Training and Lifestyle

Products Recommended for Nike’s 2026 Inventory Strategy

Training: SuperRep Go, Metcon 7, ZoomX Invincible Lifestyle: Blazer Mid and Waffle One Basketball: Air Jordan and Lebron 20

Also,since online channel has the highest number of orders, Nike should employ more people in the retail stores to enhance more sales.

SuperRep Go is highly recommended as it generates the highest revenue and is also most common in Delhi region where most of the orders generated from.

Kids has the highest order; stock the stores more with Blazer Mid