# import all libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#1. Import the "vestiaire.csv" file into a Jupyter Notebook and load it into a DataFrame named df.
# read netflix csv file
df=pd.read_csv("vestiaire.csv")
df
| product_id | product_type | product_name | product_description | product_keywords | product_gender_target | product_category | product_season | product_condition | product_like_count | … | warehouse_name | seller_id | seller_username | usually_ships_within | seller_country | seller_products_sold | seller_num_products_listed | seller_community_rank | seller_num_followers | seller_pass_rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 43247626 | Wool mini skirt | Wool mini skirt Miu Miu Grey size S Internatio… | Miu Miu – Pleated mini skirt Size: 36 (S) Wai… | Miu Miu Wool Skirts | Women | Women Clothing | Autumn / Winter | Never worn | 34.0 | … | Tourcoing | 25775970 | vitalii25775970 | NaN | Germany | 3.0 | 14.0 | 0.0 | 13.0 | 0.0 |
| 1 | 43247441 | Jacket | Jacket Barbara Bui Navy size 42 FR in Cotton | For selling nice women’s suit Barbara Bui size… | Barbara Bui Cotton Jackets | Women | Women Clothing | All seasons | Very good condition | 1.0 | … | Tourcoing | 13698770 | olivia13698770 | NaN | Belgium | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 |
| 2 | 43246517 | Wool coat | Wool coat Comme Des Garcons White size S Inter… | Magnificent boiled wool coat. I bought it in t… | Comme Des Garcons Wool Coats | Women | Women Clothing | Autumn / Winter | Very good condition | 2.0 | … | Tourcoing | 6042365 | cecilia6042365 | 1-2 days | Spain | 58.0 | 69.0 | 0.0 | 62.0 | 96.0 |
| 3 | 43246507 | Mini skirt | Mini skirt MSGM Black size 38 IT in Polyester | MSGM Skirt Black Printed Raw-Edge & Embroidere… | MSGM Polyester Skirts | Women | Women Clothing | All seasons | Very good condition | 0.0 | … | Brooklyn | 13172949 | gretchen13172949 | 1-2 days | United States | 63.0 | 274.0 | 126346.0 | 131.0 | 96.0 |
| 4 | 43246417 | Vegan leather trousers | Vegan leather trousers LVIR Black size 36 FR i… | LVIR black grained faux leather trousers size … | LVIR Vegan leather Trousers | Women | Women Clothing | All seasons | Very good condition | 1.0 | … | Crawley | 2578605 | crunchykat | 3-5 days | United Kingdom | 19.0 | 14.0 | 102821.0 | 40.0 | 89.0 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| 900509 | 41538360 | Glitter trainers | Glitter trainers Bally Gold size 38 EU in Glit… | Bally Vita Parcours sneakers. PPleather, suede… | Bally Glitter Trainers | Women | Women Shoes | All seasons | Good condition | 1.0 | … | Tourcoing | 8849230 | lauragua | 1-2 days | Italy | 40.0 | 17.0 | 391778.0 | 104.0 | 100.0 |
| 900510 | 41532271 | Leather heels | Leather heels Casadei Black size 38.5 EU in Le… | Trendy and classic Casadei high-heel pumps Mad… | Casadei Leather Heels | Women | Women Shoes | All seasons | Good condition | 1.0 | … | Tourcoing | 5448248 | bettina5448248 | 1-2 days | Austria | 26.0 | 99.0 | 51408.0 | 75.0 | 89.0 |
| 900511 | 41538140 | Leather cowboy boots | Leather cowboy boots Ash Black size 36 EU in L… | Very good quality leather boots Worn once Elas… | Ash Leather Boots | Women | Women Shoes | All seasons | Very good condition | 0.0 | … | Tourcoing | 9347694 | sylvie9347694 | NaN | France | 0.0 | 2.0 | 0.0 | 3.0 | 0.0 |
| 900512 | 41537603 | Leather ballet flats | Leather ballet flats Lauren Ralph Lauren Black… | Very beautiful ballet flats like new. I don’t … | Lauren Ralph Lauren Leather Ballet flats | Women | Women Shoes | All seasons | Very good condition | 27.0 | … | Tourcoing | 24074881 | marina24074881 | 1-2 days | Italy | 2.0 | 7.0 | 0.0 | 11.0 | 100.0 |
| 900513 | 41534820 | J’adior cloth sandal | J’adior cloth sandal Dior Beige size 38 EU in … | Dior J’adior slingback sandals pumps New, nev… | Dior J’adior Sandals | Women | Women Shoes | Spring / Summer | Never worn | 47.0 | … | Tourcoing | 24557483 | shop24557483 | 1-2 days | Romania | 66.0 | 354.0 | 0.0 | 195.0 | 75.0 |
900514 rows × 36 columns
#2. Display a basic summary of the DataFrame information:
#Available columns and dimensions
#Data types of each column
#Missing values
# df dimenssions
df.shape
(900514, 36)
# df information
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900514 entries, 0 to 900513
Data columns (total 36 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 product_id 900514 non-null int64
1 product_type 900514 non-null object
2 product_name 900514 non-null object
3 product_description 900507 non-null object
4 product_keywords 899331 non-null object
5 product_gender_target 900514 non-null object
6 product_category 899331 non-null object
7 product_season 900512 non-null object
8 product_condition 900514 non-null object
9 product_like_count 900514 non-null float64
10 sold 900514 non-null bool
11 reserved 900514 non-null bool
12 available 900514 non-null bool
13 in_stock 900514 non-null bool
14 should_be_gone 900514 non-null bool
15 brand_id 900514 non-null int64
16 brand_name 900514 non-null object
17 brand_url 900514 non-null object
18 product_material 900510 non-null object
19 product_color 900513 non-null object
20 price_usd 900514 non-null float64
21 seller_price 900514 non-null float64
22 seller_earning 900514 non-null float64
23 seller_badge 900514 non-null object
24 has_cross_border_fees 886778 non-null object
25 buyers_fees 886778 non-null float64
26 warehouse_name 900514 non-null object
27 seller_id 900514 non-null int64
28 seller_username 900475 non-null object
29 usually_ships_within 745723 non-null object
30 seller_country 900514 non-null object
31 seller_products_sold 900514 non-null float64
32 seller_num_products_listed 900514 non-null float64
33 seller_community_rank 900514 non-null float64
34 seller_num_followers 900514 non-null float64
35 seller_pass_rate 900514 non-null float64
dtypes: bool(5), float64(10), int64(3), object(18)
memory usage: 217.3+ MB
# BEFORE Data cleaning process
#the Liste of missing value(NaN)
df.isna().sum()
product_id 0
product_type 0
product_name 0
product_description 7
product_keywords 1183
product_gender_target 0
product_category 1183
product_season 2
product_condition 0
product_like_count 0
sold 0
reserved 0
available 0
in_stock 0
should_be_gone 0
brand_id 0
brand_name 0
brand_url 0
product_material 4
product_color 1
price_usd 0
seller_price 0
seller_earning 0
seller_badge 0
has_cross_border_fees 13736
buyers_fees 13736
warehouse_name 0
seller_id 0
seller_username 39
usually_ships_within 154791
seller_country 0
seller_products_sold 0
seller_num_products_listed 0
seller_community_rank 0
seller_num_followers 0
seller_pass_rate 0
dtype: int64
#1. For quantitative variables, replace missing values with MEAN.
# Quantitative variables:
# select the numerical columns
num_cols=df. select_dtypes(include ="number").columns
num_cols
Index(['product_id', 'product_like_count', 'brand_id', 'price_usd',
'seller_price', 'seller_earning', 'buyers_fees', 'seller_id',
'seller_products_sold', 'seller_num_products_listed',
'seller_community_rank', 'seller_num_followers', 'seller_pass_rate'],
dtype='object')
# replace the missing volues by the mean
for col in num_cols:
df[col]=df[col].fillna(df[col].mean())
df.isna().sum()
product_id 0
product_type 0
product_name 0
product_description 7
product_keywords 1183
product_gender_target 0
product_category 1183
product_season 2
product_condition 0
product_like_count 0
sold 0
reserved 0
available 0
in_stock 0
should_be_gone 0
brand_id 0
brand_name 0
brand_url 0
product_material 4
product_color 1
price_usd 0
seller_price 0
seller_earning 0
seller_badge 0
has_cross_border_fees 13736
buyers_fees 0
warehouse_name 0
seller_id 0
seller_username 39
usually_ships_within 154791
seller_country 0
seller_products_sold 0
seller_num_products_listed 0
seller_community_rank 0
seller_num_followers 0
seller_pass_rate 0
dtype: int64
#2. For categorical variables, replace missing values with the MODE.
# Categorical variables: Replace by the Mode
# select the numerical columns
cat_cols=df. select_dtypes(include ="object").columns
# replace the missing volues by the mean
for col in cat_cols:
df[col]=df[col].fillna(df[col].mode()[0])
df.isna().sum()
product_id 0
product_type 0
product_name 0
product_description 0
product_keywords 0
product_gender_target 0
product_category 0
product_season 0
product_condition 0
product_like_count 0
sold 0
reserved 0
available 0
in_stock 0
should_be_gone 0
brand_id 0
brand_name 0
brand_url 0
product_material 0
product_color 0
price_usd 0
seller_price 0
seller_earning 0
seller_badge 0
has_cross_border_fees 0
buyers_fees 0
warehouse_name 0
seller_id 0
seller_username 0
usually_ships_within 0
seller_country 0
seller_products_sold 0
seller_num_products_listed 0
seller_community_rank 0
seller_num_followers 0
seller_pass_rate 0
dtype: int64
#1. Displayed the first and last six rows of the DataFrame.
df.head(6)
| product_id | product_type | product_name | product_description | product_keywords | product_gender_target | product_category | product_season | product_condition | product_like_count | … | warehouse_name | seller_id | seller_username | usually_ships_within | seller_country | seller_products_sold | seller_num_products_listed | seller_community_rank | seller_num_followers | seller_pass_rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 43247626 | Wool mini skirt | Wool mini skirt Miu Miu Grey size S Internatio… | Miu Miu – Pleated mini skirt Size: 36 (S) Wai… | Miu Miu Wool Skirts | Women | Women Clothing | Autumn / Winter | Never worn | 34.0 | … | Tourcoing | 25775970 | vitalii25775970 | 1-2 days | Germany | 3.0 | 14.0 | 0.0 | 13.0 | 0.0 |
| 1 | 43247441 | Jacket | Jacket Barbara Bui Navy size 42 FR in Cotton | For selling nice women’s suit Barbara Bui size… | Barbara Bui Cotton Jackets | Women | Women Clothing | All seasons | Very good condition | 1.0 | … | Tourcoing | 13698770 | olivia13698770 | 1-2 days | Belgium | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 |
| 2 | 43246517 | Wool coat | Wool coat Comme Des Garcons White size S Inter… | Magnificent boiled wool coat. I bought it in t… | Comme Des Garcons Wool Coats | Women | Women Clothing | Autumn / Winter | Very good condition | 2.0 | … | Tourcoing | 6042365 | cecilia6042365 | 1-2 days | Spain | 58.0 | 69.0 | 0.0 | 62.0 | 96.0 |
| 3 | 43246507 | Mini skirt | Mini skirt MSGM Black size 38 IT in Polyester | MSGM Skirt Black Printed Raw-Edge & Embroidere… | MSGM Polyester Skirts | Women | Women Clothing | All seasons | Very good condition | 0.0 | … | Brooklyn | 13172949 | gretchen13172949 | 1-2 days | United States | 63.0 | 274.0 | 126346.0 | 131.0 | 96.0 |
| 4 | 43246417 | Vegan leather trousers | Vegan leather trousers LVIR Black size 36 FR i… | LVIR black grained faux leather trousers size … | LVIR Vegan leather Trousers | Women | Women Clothing | All seasons | Very good condition | 1.0 | … | Crawley | 2578605 | crunchykat | 3-5 days | United Kingdom | 19.0 | 14.0 | 102821.0 | 40.0 | 89.0 |
| 5 | 43246347 | Mid-length dress | Mid | See By Chloe white dress, size 42 IT (size lik… | See by Chloé Cotton - elasthane Dresses | Women | Women Clothing | All seasons | Very good condition | 0.0 | … | Tourcoing | 3062607 | julie3062607 | 1-2 days | France | 3.0 | 22.0 | 493200.0 | 21.0 | 100.0 |
6 rows × 36 columns
df.tail(6)
| product_id | product_type | product_name | product_description | product_keywords | product_gender_target | product_category | product_season | product_condition | product_like_count | … | warehouse_name | seller_id | seller_username | usually_ships_within | seller_country | seller_products_sold | seller_num_products_listed | seller_community_rank | seller_num_followers | seller_pass_rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 900508 | 41530377 | Cloth heels | Cloth heels Louis Vuitton Gold size 40 IT in C… | Beautiful and classic women’s Louis Vuitton Go… | Louis Vuitton Cloth Heels | Women | Women Shoes | All seasons | Never worn | 7.0 | … | Crawley | 3194111 | jade1985 | 1-2 days | United Kingdom | 22.0 | 15.0 | 301568.0 | 28.0 | 90.0 |
| 900509 | 41538360 | Glitter trainers | Glitter trainers Bally Gold size 38 EU in Glit… | Bally Vita Parcours sneakers. PPleather, suede… | Bally Glitter Trainers | Women | Women Shoes | All seasons | Good condition | 1.0 | … | Tourcoing | 8849230 | lauragua | 1-2 days | Italy | 40.0 | 17.0 | 391778.0 | 104.0 | 100.0 |
| 900510 | 41532271 | Leather heels | Leather heels Casadei Black size 38.5 EU in Le… | Trendy and classic Casadei high-heel pumps Mad… | Casadei Leather Heels | Women | Women Shoes | All seasons | Good condition | 1.0 | … | Tourcoing | 5448248 | bettina5448248 | 1-2 days | Austria | 26.0 | 99.0 | 51408.0 | 75.0 | 89.0 |
| 900511 | 41538140 | Leather cowboy boots | Leather cowboy boots Ash Black size 36 EU in L… | Very good quality leather boots Worn once Elas… | Ash Leather Boots | Women | Women Shoes | All seasons | Very good condition | 0.0 | … | Tourcoing | 9347694 | sylvie9347694 | 1-2 days | France | 0.0 | 2.0 | 0.0 | 3.0 | 0.0 |
| 900512 | 41537603 | Leather ballet flats | Leather ballet flats Lauren Ralph Lauren Black… | Very beautiful ballet flats like new. I don’t … | Lauren Ralph Lauren Leather Ballet flats | Women | Women Shoes | All seasons | Very good condition | 27.0 | … | Tourcoing | 24074881 | marina24074881 | 1-2 days | Italy | 2.0 | 7.0 | 0.0 | 11.0 | 100.0 |
| 900513 | 41534820 | J’adior cloth sandal | J’adior cloth sandal Dior Beige size 38 EU in … | Dior J’adior slingback sandals pumps New, nev… | Dior J’adior Sandals | Women | Women Shoes | Spring / Summer | Never worn | 47.0 | … | Tourcoing | 24557483 | shop24557483 | 1-2 days | Romania | 66.0 | 354.0 | 0.0 | 195.0 | 75.0 |
6 rows × 36 columns
#2. Identified the best-selling product_type and brand on sold data = "transaction count" / (Top 5 for TYPE OF PRODUCT & BRAND)
best_selling_product_type = df["product_type"].value_counts().reset_index()
best_selling_product_type.columns = ['product_type', 'transaction_count']
best_selling_brand_name = df['brand_name'].value_counts().reset_index()
best_selling_brand_name.columns = ['brand_name', 'transaction_count']
best_selling_product_type
| product_type | transaction_count | |
|---|---|---|
| 0 | Sunglasses | 42861 |
| 1 | Jacket | 29499 |
| 2 | Silk tie | 26103 |
| 3 | T-shirt | 24929 |
| 4 | Shirt | 21958 |
| … | … | … |
| 10979 | Roller Boat trainers | 1 |
| 10980 | Zoom Fly glitter low trainers | 1 |
| 10981 | Air Jordan 14 high trainers | 1 |
| 10982 | VLTN top | 1 |
| 10983 | Kitty cloth heels | 1 |
10984 rows × 2 columns
best_selling_brand_name
| brand_name | transaction_count | |
|---|---|---|
| 0 | Gucci | 41009 |
| 1 | Burberry | 24018 |
| 2 | Dolce & Gabbana | 22024 |
| 3 | Prada | 20972 |
| 4 | Hermès | 18711 |
| … | … | … |
| 8879 | Jose Levy | 1 |
| 8880 | Kolon Sport | 1 |
| 8881 | Medicom Toy | 1 |
| 8882 | CIRCOLO 1905 | 1 |
| 8883 | Aera | 1 |
8884 rows × 2 columns
#3. Identified seasonal trends by evaluating product sales across different seasons using product_season.
best_selling_product_season = df["product_season"].value_counts().reset_index()
best_selling_product_season.columns = ['product_season', 'transaction_count']
best_selling_product_season
| product_season | transaction_count | |
|---|---|---|
| 0 | All seasons | 748110 |
| 1 | Autumn / Winter | 114216 |
| 2 | Spring / Summer | 38188 |
#4. Compared product_like_count and sold data to identify products that are popular but have low conversion rates (In product category).
#Calculated the AVERAGE NUMBER OF LIKES and CONVERSION RATE for each PRODUCT CATEGORY
product_stats = df.groupby('product_category',).agg(
avg_like_count=('product_like_count', 'mean'),
sold_count=('sold', lambda x: (x == 'True').sum()),
total_count=('sold', 'count')
).reset_index()
# Calculated RATE OF SOLD PRODUCTS - "sold rate"
product_stats['sold_rate'] = product_stats['sold_count'] / product_stats['total_count']
# Calculated the AVERGAE LIKES / TRANSACTION RATE - "overall average number of likes" and "overall transaction rate"
overall_avg_likes = df['product_like_count'].mean()
overall_sold_rate = (df['sold'] == 'True').sum() / len(df)
# Found out the PRODUCT CATEGORIES WITH HIGH LIKES BUT LOW SALES - product categories with high likes but low transaction rate
low_conversion_products = product_stats[
(product_stats['avg_like_count'] > overall_avg_likes) &
(product_stats['sold_rate'] < overall_sold_rate)
]
print("Sales and likes analysis for all product category:")
print(product_stats)
print("Product categories with high likes but low transaction rates:")
print(low_conversion_products)
Sales and likes analysis for all product category:
product_category avg_like_count sold_count total_count sold_rate
0 Men Accessories 6.477393 919 98776 0.009304
1 Men Clothing 4.391110 2744 194265 0.014125
2 Men Shoes 5.979518 1620 125425 0.012916
3 Women Accessories 11.504123 601 146018 0.004116
4 Women Clothing 4.278131 3910 192197 0.020344
5 Women Shoes 6.443911 3942 143833 0.027407
Product categories with high likes but low transaction rates:
product_category avg_like_count sold_count total_count sold_rate
0 Men Accessories 6.477393 919 98776 0.009304
3 Women Accessories 11.504123 601 146018 0.004116
#5. Calculated the PRODUCT TRANSACTION RATE IN EACH SEASON - (All, Spring/Summer, Autumn/Winter)
# Calculated the transaction volume of products in each season
season_sales = df.groupby('product_season')['sold'].agg(
sold_count=lambda x: (x == 'True').sum(),
unsold_count=lambda x: (x == 'False').sum()
).reset_index()
# Calculated the transaction rate
season_sales['sold_rate'] = season_sales['sold_count'] / (season_sales['sold_count'] + season_sales['unsold_count'])
#Adjusted the order
season_sales = season_sales.sort_values(by='sold_count', ascending=True)
print(season_sales)
product_season sold_count unsold_count sold_rate
2 Spring / Summer 1024 37164 0.026815
1 Autumn / Winter 1222 112994 0.010699
0 All seasons 11490 736620 0.015359
#6. Identifyied POPULAR PRODUCT CATEGORIES FOR MEN AND WOMEN using product_gender_target.
# Calculated the number of sales for each product category by gender
gender_category_sales = df.groupby(['product_category', 'product_gender_target']).agg(
sold_count=('sold', lambda x: (x == 'True').sum()), # Count sold items (T)
total_count=('sold', 'count') # Total products listed
).reset_index()
# Calculated sales rate for each category-gender combination
gender_category_sales['sold_rate'] = gender_category_sales['sold_count'] / gender_category_sales['total_count']
# Identified the most popular categories for each gender
top_categories = gender_category_sales.sort_values(by=['product_gender_target', 'sold_count'], ascending=[True, False])
# Displayed results
print("Sales analysis by gender and product category:")
print(gender_category_sales)
print("\nTop-selling product categories for each gender:")
print(top_categories)
Sales analysis by gender and product category:
product_category product_gender_target sold_count total_count sold_rate
0 Men Accessories Men 919 98776 0.009304
1 Men Clothing Men 2744 193936 0.014149
2 Men Clothing Women 0 329 0.000000
3 Men Shoes Men 1620 125425 0.012916
4 Women Accessories Women 601 146018 0.004116
5 Women Clothing Women 3910 192197 0.020344
6 Women Shoes Women 3942 143833 0.027407
Top-selling product categories for each gender:
product_category product_gender_target sold_count total_count sold_rate
1 Men Clothing Men 2744 193936 0.014149
3 Men Shoes Men 1620 125425 0.012916
0 Men Accessories Men 919 98776 0.009304
6 Women Shoes Women 3942 143833 0.027407
5 Women Clothing Women 3910 192197 0.020344
4 Women Accessories Women 601 146018 0.004116
2 Men Clothing Women 0 329 0.000000
# Calculated the TRANSACTION RATE BY PRODUCT CATEGORY
# transforn "sold" as strings
df['sold'] = df['sold'].astype(str)
# Calculate the transaction volume of products in each category
category_sales = df.groupby('product_category')['sold'].agg(
sold_count=lambda x: (x == 'True').sum(),
unsold_count=lambda x: (x == 'False').sum()
).reset_index()
# Calculate the transaction rate
category_sales['sold_rate'] = category_sales['sold_count'] / (category_sales['sold_count'] + category_sales['unsold_count'])
#Adjust the order
category_sales = category_sales.sort_values(by='sold_count', ascending=False)
print(category_sales)
product_category sold_count unsold_count sold_rate
5 Women Shoes 3942 139891 0.027407
4 Women Clothing 3910 188287 0.020344
1 Men Clothing 2744 191521 0.014125
2 Men Shoes 1620 123805 0.012916
0 Men Accessories 919 97857 0.009304
3 Women Accessories 601 145417 0.004116
#1. Plot the top-selling product_type(bar chart).
category_sales.plot(x="product_category",
y=["sold_count"],
kind="bar",
color=["skyblue"],
edgecolor="black")
plt.title("Sold Count by Product Category")
plt.xlabel("Product Category")
plt.ylabel("soldcount")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
png
#2. Graph the market share of different brands using a bar chart (H).
total_transactions = best_selling_brand_name['transaction_count'].sum()
best_selling_brand_name['market_share'] = best_selling_brand_name['transaction_count'] / total_transactions
import matplotlib.pyplot as plt
best_selling_brand_name = best_selling_brand_name.head(10)
plt.figure(figsize=(10, 0.5 * len(best_selling_brand_name)))
plt.barh(best_selling_brand_name['brand_name'], best_selling_brand_name['market_share'], color='skyblue')
plt.xlabel('Market Share')
plt.ylabel('Brand')
plt.title('Top 10 Brand Market Share')
plt.xlim(0, 1)
plt.grid(axis='x', linestyle='--', alpha=0.3)
for i, value in enumerate(best_selling_brand_name['market_share']):
plt.text(value + 0.01, i, f"{value:.2%}", va='center')
plt.tight_layout()
plt.show()
png
#3. Create a scatter plot comparing product_like_count and sold to visualize conversion rates.
# Step 4: Plotting - One point per product category
plt.figure(figsize=(10, 6))
plt.scatter(product_stats['avg_like_count'], product_stats['sold_rate'], color='skyblue', s=100)
# Add labels for each product category
for i, row in product_stats.iterrows():
plt.text(row['avg_like_count'] + 1, row['sold_rate'], row['product_category'])
# Add reference lines for overall averages
plt.axvline(overall_avg_likes, color='red', linestyle='--', label='Overall Avg. Likes')
plt.axhline(overall_sold_rate, color='green', linestyle='--', label='Overall Avg. Sold Rate')
# Beautify the plot
plt.title('Product Categories: Likes vs. Conversion Rate')
plt.xlabel('Average Like Count')
plt.ylabel('Conversion Rate (Sold Rate)')
plt.grid(True, linestyle='--', alpha=0.4)
plt.legend()
plt.tight_layout()
# Display the plot
plt.show()
# Output product categories with high likes but low conversion rates
print("⚠️ Product categories with high likes but low conversion rates:")
print(low_conversion_products)
png
⚠️ Product categories with high likes but low conversion rates:
product_category avg_like_count sold_count total_count sold_rate
0 Men Accessories 6.477393 919 98776 0.009304
3 Women Accessories 11.504123 601 146018 0.004116
#4. Created a pie chart showing Distribution of Sold Products by Season.
plt.pie(season_sales["sold_count"],
labels=season_sales["product_season"],
autopct='%1.1f%%',
startangle=140,
colors=plt.cm.Pastel1.colors)
plt.title("Distribution of Sold Products by Season")
plt.show()
png
#1. Identified High-Potential Products based on the following criteria:
#High product_like_count but low sold numbers:
product_stats_brand = df.groupby('brand_name').agg(
avg_like_count=('product_like_count', 'mean'),
sold_count=('sold', lambda x: (x == 'True').sum()),
total_count=('sold', 'count')
).reset_index()
# Calculated sales rate for each category-gender combination
product_stats_brand['sold_rate'] = product_stats_brand['sold_count'] / product_stats_brand['total_count']
# Step 1: Filtered brands with high likes but low conversion rate (high potential)
low_conversion_products = product_stats_brand[
(product_stats_brand['avg_like_count'] > overall_avg_likes) &
(product_stats_brand['sold_rate'] < overall_sold_rate)
].copy()
# Step 2: Added gap columns to measure the potential level
low_conversion_products['like_gap'] = low_conversion_products['avg_like_count'] - overall_avg_likes
low_conversion_products['sold_rate_gap'] = overall_sold_rate - low_conversion_products['sold_rate']
# Step 3: Sorted by potential (biggest like gap and biggest drop in sold rate), select top 10
top10_high_potential = low_conversion_products.sort_values(
by=['like_gap', 'sold_rate_gap'],
ascending=[False, False]
).head(10)
# Step 4: Displayed the result
print("✅ Top 10 High-Potential Brands (High Likes, Low Conversion Rate):")
print(top10_high_potential[["brand_name", "avg_like_count", "sold_rate", "like_gap", "sold_rate_gap"]])
✅ Top 10 High-Potential Brands (High Likes, Low Conversion Rate):
brand_name avg_like_count sold_rate like_gap \
7597 Spinelli Kilcollin 91.000000 0.0 84.701674
4715 Little Liffner 81.500000 0.0 75.201674
7398 Shearling 79.000000 0.0 72.701674
7569 Sophie Buhai 62.000000 0.0 55.701674
361 Alabaster Industries 58.500000 0.0 52.201674
2375 ETOILE CORAL 54.000000 0.0 47.701674
4698 Lirika Matoshi 53.500000 0.0 47.201674
1697 Charlotte Knowles 52.833333 0.0 46.535007
8297 VAQUERA 50.500000 0.0 44.201674
5647 Mowalola 48.333333 0.0 42.035007
sold_rate_gap
7597 0.015254
4715 0.015254
7398 0.015254
7569 0.015254
361 0.015254
2375 0.015254
4698 0.015254
1697 0.015254
8297 0.015254
5647 0.015254
#2. Computed the Brand Performance Index using the formula:
#- Market Influence: (best_selling + product_like_count)
# best_selling_product_type:
best_selling_product_type = df["product_type"].value_counts().reset_index()
best_selling_product_type.columns = ['product_type', 'transaction_count']
# product_stats:
#Calculate the average number of likes and conversion rate for each product type
product_stats_type= df.groupby('product_type').agg(
avg_like_count=('product_like_count', 'mean'),
sold_count=('sold', lambda x: (x == 'True').sum()),
total_count_type=('sold', 'count')
).reset_index()
# Market Influence = transaction_count + avg_like_count
market_influence_df = pd.merge(
best_selling_product_type,
product_stats_type[['product_type', 'avg_like_count']],
on='product_type',
how='inner'
)
# Market Influence:
market_influence_df['market_influence'] = market_influence_df['transaction_count'] + market_influence_df['avg_like_count']
print("Market Influence for each product_type:")
print(market_influence_df[['product_type','market_influence']])
Market Influence for each product_type:
product_type market_influence
0 Sunglasses 42868.499662
1 Jacket 29502.949964
2 Silk tie 26105.791825
3 T-shirt 24933.860283
4 Shirt 21961.626788
... ... ...
10979 Roller Boat trainers 2.000000
10980 Zoom Fly glitter low trainers 1.000000
10981 Air Jordan 14 high trainers 1.000000
10982 VLTN top 2.000000
10983 Kitty cloth heels 5.000000
[10984 rows x 2 columns]
#The five product types with the highest like count in each gender group.
import pandas as pd
gender_product_like = df.groupby(['product_gender_target', 'product_type']).agg(
total_likes=('product_like_count', 'sum')
).reset_index()
gender_product_like = gender_product_like.sort_values(
by=['product_gender_target', 'total_likes'],
ascending=[True, False])
top5_by_gender = gender_product_like.groupby('product_gender_target').head(5)
print("Top 5 products by gender with highest like count:")
print(top5_by_gender[['product_gender_target', 'product_type', 'total_likes']])
Top 5 products by gender with highest like count:
product_gender_target product_type total_likes
3386 Men Sunglasses 112086.0
3420 Men T-shirt 94930.0
1943 Men Jacket 90567.0
2084 Men Leather low trainers 83927.0
3414 Men Sweatshirt 76106.0
10726 Women Sunglasses 209357.0
7802 Women Leather belt 132975.0
7892 Women Leather wallet 102619.0
7836 Women Leather heels 96385.0
11672 Women Watch 82062.0
# find highest like
max_likes = df['product_like_count'].max()
min_likes = df['product_like_count'].min()
print(f"max_likes: {max_likes}")
print(f"min_likes: {min_likes}")
max_likes: 3154.0
min_likes: 0.0
category_sales.plot(x="product_category",
y=[ "sold_rate"],
kind="bar",
color=["salmon"],
edgecolor="black")
plt.title("Sold Rate by Product Category")
plt.xlabel("Product Category")
plt.ylabel("Sold rate")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
png
plt.scatter(category_sales["sold_count"],
category_sales["sold_rate"],
color="teal")
plt.title("Sold Rate vs Sold Count by Category")
plt.xlabel("Sold Count")
plt.ylabel("Sold Rate")
plt.grid(True)
# mostrar las etiquetas de cada punto
for i, txt in enumerate(category_sales["product_category"]):
plt.annotate(txt, (category_sales["sold_count"].iloc[i],
category_sales["sold_rate"].iloc[i]),
fontsize=8, alpha=0.7)
png
gender_product_like = df.groupby(['product_gender_target', 'product_type']).agg(
total_likes=('product_like_count', 'sum')
).reset_index()
top_product_by_gender = gender_product_like.sort_values(
by=['product_gender_target', 'total_likes'],
ascending=[True, False])
print(top_product_by_gender)
product_gender_target product_type total_likes
3386 Men Sunglasses 112086.0
3420 Men T-shirt 94930.0
1943 Men Jacket 90567.0
2084 Men Leather low trainers 83927.0
3414 Men Sweatshirt 76106.0
... ... ... ...
11712 Women Wonderland ankle boots 0.0
11819 Women Youth buckled boots 0.0
11842 Women Zen leather trainers 0.0
11845 Women Zen vegan leather trainers 0.0
11894 Women Zoé vinyl wallet 0.0
[11911 rows x 3 columns]
gender_product_like = gender_product_like.sort_values(
by=['product_gender_target', 'total_likes'],
ascending=[True, True])
bottom5_by_gender = gender_product_like.groupby('product_gender_target').head(5)
print("Bottom 5 products by gender with lowest like count:")
print(bottom5_by_gender[['product_gender_target', 'product_type', 'total_likes']])
Bottom 5 products by gender with lowest like count:
product_gender_target product_type \
0 Men leather
19 Men 1461 (3 eye) patent leather lace ups
20 Men 1461 (3 eye) vegan leather lace ups
63 Men 420 cloth trainers
94 Men 514 slim jean
3964 Women 'Foster' Lace-Up Ballet leather ballet flats
3968 Women 101 (6 eye) leather lace up boots
3971 Women 101 (6 eye) vegan leather lace up boots
3979 Women 1460 Pascal (8 eye) exotic leathers boots
3994 Women 1460 Pascal (8 eye) vegan leather biker boots
total_likes
0 0.0
19 0.0
20 0.0
63 0.0
94 0.0
3964 0.0
3968 0.0
3971 0.0
3979 0.0
3994 0.0
df
| product_id | product_type | product_name | product_description | product_keywords | product_gender_target | product_category | product_season | product_condition | product_like_count | … | warehouse_name | seller_id | seller_username | usually_ships_within | seller_country | seller_products_sold | seller_num_products_listed | seller_community_rank | seller_num_followers | seller_pass_rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 43247626 | Wool mini skirt | Wool mini skirt Miu Miu Grey size S Internatio… | Miu Miu – Pleated mini skirt Size: 36 (S) Wai… | Miu Miu Wool Skirts | Women | Women Clothing | Autumn / Winter | Never worn | 34.0 | … | Tourcoing | 25775970 | vitalii25775970 | 1-2 days | Germany | 3.0 | 14.0 | 0.0 | 13.0 | 0.0 |
| 1 | 43247441 | Jacket | Jacket Barbara Bui Navy size 42 FR in Cotton | For selling nice women’s suit Barbara Bui size… | Barbara Bui Cotton Jackets | Women | Women Clothing | All seasons | Very good condition | 1.0 | … | Tourcoing | 13698770 | olivia13698770 | 1-2 days | Belgium | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 |
| 2 | 43246517 | Wool coat | Wool coat Comme Des Garcons White size S Inter… | Magnificent boiled wool coat. I bought it in t… | Comme Des Garcons Wool Coats | Women | Women Clothing | Autumn / Winter | Very good condition | 2.0 | … | Tourcoing | 6042365 | cecilia6042365 | 1-2 days | Spain | 58.0 | 69.0 | 0.0 | 62.0 | 96.0 |
| 3 | 43246507 | Mini skirt | Mini skirt MSGM Black size 38 IT in Polyester | MSGM Skirt Black Printed Raw-Edge & Embroidere… | MSGM Polyester Skirts | Women | Women Clothing | All seasons | Very good condition | 0.0 | … | Brooklyn | 13172949 | gretchen13172949 | 1-2 days | United States | 63.0 | 274.0 | 126346.0 | 131.0 | 96.0 |
| 4 | 43246417 | Vegan leather trousers | Vegan leather trousers LVIR Black size 36 FR i… | LVIR black grained faux leather trousers size … | LVIR Vegan leather Trousers | Women | Women Clothing | All seasons | Very good condition | 1.0 | … | Crawley | 2578605 | crunchykat | 3-5 days | United Kingdom | 19.0 | 14.0 | 102821.0 | 40.0 | 89.0 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| 900509 | 41538360 | Glitter trainers | Glitter trainers Bally Gold size 38 EU in Glit… | Bally Vita Parcours sneakers. PPleather, suede… | Bally Glitter Trainers | Women | Women Shoes | All seasons | Good condition | 1.0 | … | Tourcoing | 8849230 | lauragua | 1-2 days | Italy | 40.0 | 17.0 | 391778.0 | 104.0 | 100.0 |
| 900510 | 41532271 | Leather heels | Leather heels Casadei Black size 38.5 EU in Le… | Trendy and classic Casadei high-heel pumps Mad… | Casadei Leather Heels | Women | Women Shoes | All seasons | Good condition | 1.0 | … | Tourcoing | 5448248 | bettina5448248 | 1-2 days | Austria | 26.0 | 99.0 | 51408.0 | 75.0 | 89.0 |
| 900511 | 41538140 | Leather cowboy boots | Leather cowboy boots Ash Black size 36 EU in L… | Very good quality leather boots Worn once Elas… | Ash Leather Boots | Women | Women Shoes | All seasons | Very good condition | 0.0 | … | Tourcoing | 9347694 | sylvie9347694 | 1-2 days | France | 0.0 | 2.0 | 0.0 | 3.0 | 0.0 |
| 900512 | 41537603 | Leather ballet flats | Leather ballet flats Lauren Ralph Lauren Black… | Very beautiful ballet flats like new. I don’t … | Lauren Ralph Lauren Leather Ballet flats | Women | Women Shoes | All seasons | Very good condition | 27.0 | … | Tourcoing | 24074881 | marina24074881 | 1-2 days | Italy | 2.0 | 7.0 | 0.0 | 11.0 | 100.0 |
| 900513 | 41534820 | J’adior cloth sandal | J’adior cloth sandal Dior Beige size 38 EU in … | Dior J’adior slingback sandals pumps New, nev… | Dior J’adior Sandals | Women | Women Shoes | Spring / Summer | Never worn | 47.0 | … | Tourcoing | 24557483 | shop24557483 | 1-2 days | Romania | 66.0 | 354.0 | 0.0 | 195.0 | 75.0 |
900514 rows × 36 columns