Objectives

To apply functions and loops in data science tasks
To simulate real-world datasets (sales & company data)
To use conditional logic and data transformation
To perform analysis and create visualizations
To build an automated data science workflow

1. Dynamic Multi-Formula (Improved)

import numpy as np
import matplotlib.pyplot as plt
def compute_formula(x, formula_type):
"""
Compute different mathematical formulas
"""
if formula_type == "linear":
    return 2*x + 3
elif formula_type == "quadratic":
    return x**2 + 2*x + 1
elif formula_type == "cubic":
    return x**3 - x**2 + 2
elif formula_type == "exponential":
    return np.exp(0.2*x)
else:
    raise ValueError("Invalid formula type")
def plot_formulas():
x = np.arange(1, 21)
formulas = ["linear", "quadratic", "cubic", "exponential"]

plt.figure()
for f in formulas:
    y = compute_formula(x, f)
    plt.plot(x, y, label=f)

plt.title("Comparison of Multiple Mathematical Functions")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.grid()
plt.show()
plot_formulas()

Logo

2. Sales Simulation (Improved + Insight)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def simulate_sales(n_salesperson, days):
"""
Simulate sales data with discount logic
"""
data = []

for s in range(n_salesperson):
    cumulative = 0

    for d in range(days):
        sales = np.random.randint(100, 1000)

        # Conditional discount
        if sales > 700:
            discount = 0.2
        elif sales > 400:
            discount = 0.1
        else:
            discount = 0.05

        cumulative += sales

        data.append([s, d, sales, discount, cumulative])

df = pd.DataFrame(data, columns=[
    "sales_id", "day", "sales_amount", "discount", "cumulative_sales"
])

return df

def analyze_sales(df):
# Summary insight
print(" Sales Summary:")
print(df.groupby("sales_id")["sales_amount"].sum())

# Plot
sns.lineplot(data=df, x="day", y="cumulative_sales", hue="sales_id")
plt.title("Cumulative Sales per Salesperson")
plt.show()

sales_df = simulate_sales(5, 30)
analyze_sales(sales_df)


Sales Summary:
sales_id
0    18195
1    17570
2    15307
3    15634
4    14961

Name: sales_amount, dtype: int64

Logo

3. Performance Categorization (Improved)

def categorize_performance(sales):
categories = []

for s in sales:
    if s > 800:
        categories.append("Excellent")
    elif s > 600:
        categories.append("Very Good")
    elif s > 400:
        categories.append("Good")
    elif s > 200:
        categories.append("Average")
    else:
        categories.append("Poor")

return categories

def plot_performance(df):
counts = df["category"].value_counts()

print("\n Category Percentage:")
print((counts / len(df) * 100).round(2))

# Bar
counts.plot(kind="bar", title="Performance Distribution")
plt.show()

# Pie
counts.plot(kind="pie", autopct='%1.1f%%')
plt.title("Performance Share")
plt.show()

sales_df["category"] = categorize_performance(sales_df["sales_amount"])
plot_performance(sales_df)

Category Percentage:
category
Very Good    27.33
Good         21.33
Excellent    20.00
Average      17.33
Poor         14.00
Name: count, dtype: float64

Logo

4. Company Dataset (Improved)

def generate_company_data(n_company, n_employees):
data = []

for c in range(n_company):
    for e in range(n_employees):
        salary = np.random.randint(3000, 15000)
        performance = np.random.randint(50, 100)
        kpi = np.random.randint(50, 100)
        dept = np.random.choice(["HR", "IT", "Finance", "Marketing"])

        data.append([c, e, salary, dept, performance, kpi])

df = pd.DataFrame(data, columns=[
    "company_id", "employee_id", "salary",
    "department", "performance_score", "KPI_score"
])

return df

def company_summary(df):
summary = df.groupby("company_id").agg({
    "salary": "mean",
    "performance_score": "mean",
    "KPI_score": "max"
})

print("\n Company Summary:")
print(summary)

company_df = generate_company_data(5, 100)
company_summary(company_df)

Company Summary:
salary  performance_score  KPI_score
company_id                                       
0           8544.81              72.81         99
1           8773.76              74.91         99
2           8626.85              75.82         99
3           8691.17              76.15         99
4           8755.02              75.70         99

5. Monte Carlo π (Improved Visualization)

def monte_carlo_pi(n_points):
x = np.random.rand(n_points)
y = np.random.rand(n_points)

inside = (x**2 + y**2) <= 1
pi_estimate = 4 * np.sum(inside) / n_points

print(f"Estimated Pi: {pi_estimate}")

plt.scatter(x[inside], y[inside], s=1, label="Inside")
plt.scatter(x[~inside], y[~inside], s=1, label="Outside")
plt.legend()
plt.title("Monte Carlo Simulation of Pi")
plt.show()

monte_carlo_pi(10000)

Estimated Pi: 3.1164

Logo

6. Feature Engineering (Upgrade)

def add_features(df):
# KPI Category
df["performance_category"] = pd.cut(
    df["performance_score"],
    bins=[0,60,70,80,90,100],
    labels=["Poor","Average","Good","Very Good","Excellent"]
)

# Salary Bracket
df["salary_bracket"] = pd.cut(
    df["salary"],
    bins=[0,5000,8000,12000,20000],
    labels=["Low","Medium","High","Very High"]
)

return df

company_df = add_features(company_df)

*Visualization*
sns.boxplot(data=company_df, x="salary_bracket", y="KPI_score")
plt.title("KPI by Salary Bracket")
plt.show()

Logo

7. Mini Project (Advance)

def mini_dashboard(df):
# Top performers
top = df[df["KPI_score"] > 90]

print("\n Top Performers Count:", len(top))

# Scatter + regression
sns.regplot(data=df, x="salary", y="KPI_score")
plt.title("Salary vs KPI Relationship")
plt.show()

# Department comparison
sns.barplot(data=df, x="department", y="KPI_score")
plt.title("Average KPI per Department")
plt.show()

# Salary distribution
sns.histplot(df["salary"], kde=True)
plt.title("Salary Distribution")
plt.show()

mini_dashboard(company_df)

Top Performers Count: 76

Logo

Functions & Loops + Data Science

Week 5

2026-04-06

Profile Author

Introduction

Objectives

1. Dynamic Multi-Formula (Improved)

2. Sales Simulation (Improved + Insight)

3. Performance Categorization (Improved)

4. Company Dataset (Improved)

5. Monte Carlo π (Improved Visualization)

6. Feature Engineering (Upgrade)

7. Mini Project (Advance)

Conclusion