Profile Author

Naila Syahrani Putri

Data Science

Institut Teknologi Sains Bandung Logo

Introduction

Data science plays an important role in modern decision-making by transforming raw data into meaningful insights. This project focuses on the use of functions and loops to simulate real-world data scenarios, perform analysis, and create visualizations. The practicum aims to develop programming skills and demonstrate how automated workflows can support data-driven solutions.

Objectives

1. Dynamic Multi-Formula (Improved)

import numpy as np
import matplotlib.pyplot as plt
def compute_formula(x, formula_type):
"""
Compute different mathematical formulas
"""
if formula_type == "linear":
    return 2*x + 3
elif formula_type == "quadratic":
    return x**2 + 2*x + 1
elif formula_type == "cubic":
    return x**3 - x**2 + 2
elif formula_type == "exponential":
    return np.exp(0.2*x)
else:
    raise ValueError("Invalid formula type")
def plot_formulas():
x = np.arange(1, 21)
formulas = ["linear", "quadratic", "cubic", "exponential"]

plt.figure()
for f in formulas:
    y = compute_formula(x, f)
    plt.plot(x, y, label=f)

plt.title("Comparison of Multiple Mathematical Functions")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.grid()
plt.show()
plot_formulas()

Logo

2. Sales Simulation (Improved + Insight)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def simulate_sales(n_salesperson, days):
"""
Simulate sales data with discount logic
"""
data = []

for s in range(n_salesperson):
    cumulative = 0

    for d in range(days):
        sales = np.random.randint(100, 1000)

        # Conditional discount
        if sales > 700:
            discount = 0.2
        elif sales > 400:
            discount = 0.1
        else:
            discount = 0.05

        cumulative += sales

        data.append([s, d, sales, discount, cumulative])

df = pd.DataFrame(data, columns=[
    "sales_id", "day", "sales_amount", "discount", "cumulative_sales"
])

return df

def analyze_sales(df):
# Summary insight
print(" Sales Summary:")
print(df.groupby("sales_id")["sales_amount"].sum())

# Plot
sns.lineplot(data=df, x="day", y="cumulative_sales", hue="sales_id")
plt.title("Cumulative Sales per Salesperson")
plt.show()

sales_df = simulate_sales(5, 30)
analyze_sales(sales_df)


Sales Summary:
sales_id
0    18195
1    17570
2    15307
3    15634
4    14961

Name: sales_amount, dtype: int64

Logo

3. Performance Categorization (Improved)

def categorize_performance(sales):
categories = []

for s in sales:
    if s > 800:
        categories.append("Excellent")
    elif s > 600:
        categories.append("Very Good")
    elif s > 400:
        categories.append("Good")
    elif s > 200:
        categories.append("Average")
    else:
        categories.append("Poor")

return categories

def plot_performance(df):
counts = df["category"].value_counts()

print("\n Category Percentage:")
print((counts / len(df) * 100).round(2))

# Bar
counts.plot(kind="bar", title="Performance Distribution")
plt.show()

# Pie
counts.plot(kind="pie", autopct='%1.1f%%')
plt.title("Performance Share")
plt.show()

sales_df["category"] = categorize_performance(sales_df["sales_amount"])
plot_performance(sales_df)

Category Percentage:
category
Very Good    27.33
Good         21.33
Excellent    20.00
Average      17.33
Poor         14.00
Name: count, dtype: float64

Logo

Logo

4. Company Dataset (Improved)

def generate_company_data(n_company, n_employees):
data = []

for c in range(n_company):
    for e in range(n_employees):
        salary = np.random.randint(3000, 15000)
        performance = np.random.randint(50, 100)
        kpi = np.random.randint(50, 100)
        dept = np.random.choice(["HR", "IT", "Finance", "Marketing"])

        data.append([c, e, salary, dept, performance, kpi])

df = pd.DataFrame(data, columns=[
    "company_id", "employee_id", "salary",
    "department", "performance_score", "KPI_score"
])

return df

def company_summary(df):
summary = df.groupby("company_id").agg({
    "salary": "mean",
    "performance_score": "mean",
    "KPI_score": "max"
})

print("\n Company Summary:")
print(summary)

company_df = generate_company_data(5, 100)
company_summary(company_df)

Company Summary:
salary  performance_score  KPI_score
company_id                                       
0           8544.81              72.81         99
1           8773.76              74.91         99
2           8626.85              75.82         99
3           8691.17              76.15         99
4           8755.02              75.70         99

5. Monte Carlo π (Improved Visualization)

def monte_carlo_pi(n_points):
x = np.random.rand(n_points)
y = np.random.rand(n_points)

inside = (x**2 + y**2) <= 1
pi_estimate = 4 * np.sum(inside) / n_points

print(f"Estimated Pi: {pi_estimate}")

plt.scatter(x[inside], y[inside], s=1, label="Inside")
plt.scatter(x[~inside], y[~inside], s=1, label="Outside")
plt.legend()
plt.title("Monte Carlo Simulation of Pi")
plt.show()

monte_carlo_pi(10000)

Estimated Pi: 3.1164

Logo

6. Feature Engineering (Upgrade)

def add_features(df):
# KPI Category
df["performance_category"] = pd.cut(
    df["performance_score"],
    bins=[0,60,70,80,90,100],
    labels=["Poor","Average","Good","Very Good","Excellent"]
)

# Salary Bracket
df["salary_bracket"] = pd.cut(
    df["salary"],
    bins=[0,5000,8000,12000,20000],
    labels=["Low","Medium","High","Very High"]
)

return df

company_df = add_features(company_df)

*Visualization*
sns.boxplot(data=company_df, x="salary_bracket", y="KPI_score")
plt.title("KPI by Salary Bracket")
plt.show()

Logo

7. Mini Project (Advance)

def mini_dashboard(df):
# Top performers
top = df[df["KPI_score"] > 90]

print("\n Top Performers Count:", len(top))

# Scatter + regression
sns.regplot(data=df, x="salary", y="KPI_score")
plt.title("Salary vs KPI Relationship")
plt.show()

# Department comparison
sns.barplot(data=df, x="department", y="KPI_score")
plt.title("Average KPI per Department")
plt.show()

# Salary distribution
sns.histplot(df["salary"], kde=True)
plt.title("Salary Distribution")
plt.show()

mini_dashboard(company_df)

Top Performers Count: 76

Logo

Logo

Logo

Conclusion

This practicum shows how functions, loops, and data science techniques can be used to process data efficiently. The project demonstrates data simulation, analysis, and visualization to generate useful insights. Overall, it helps build programming skills and understanding of real-world data science workflows.