Assignment Week 5 ~ Advanced Practicum: Functions & Loops + Data Science

Chelsea Tesalonika
Patricia Hutajulu

52250041
Student

Bakti Siregar, M.Sc., CDS

Lecturer

Major : Data Science at ITSB

Data Science Programming

1. Multi-Formula Mathematical Modeling and Function Implementation

Conceptual Overview

Formula Expression
Linear y = x
Quadratic y = x²
Cubic y = x³
Exponential y = eˣ

Function Implementation

compute_formula <- function(x, formula) {

  valid_formulas <- c("linear", "quadratic", "cubic", "exponential")

  if (!formula %in% valid_formulas) {
    stop(paste(
      "Invalid formula! Choose:",
      paste(valid_formulas, collapse = ", ")
    ))
  }

  result <- switch(formula,
    "linear"      = x,
    "quadratic"   = x^2,
    "cubic"       = x^3,
    "exponential" = exp(x)
  )

  return(result)
}

library(reactable)
library(htmltools)

# Example Output Table
example_df <- data.frame(
  Input = c(5, 2, 3, 1),
  Formula = c("quadratic", "exponential", "cubic", "linear"),
  Output = c(
    compute_formula(5, "quadratic"),
    compute_formula(2, "exponential"),
    compute_formula(3, "cubic"),
    compute_formula(1, "linear")
  )
)

htmltools::browsable(
  reactable(
    example_df,
    searchable = TRUE,
    bordered = TRUE,
    striped = TRUE,
    highlight = TRUE,
    resizable = TRUE,
    defaultPageSize = 5,
    wrap = FALSE,
    fullWidth = TRUE,
    defaultColDef = colDef(align = "center"),
    
    theme = reactableTheme(
      headerStyle = list(
        background = "#E3F2FD",
        fontWeight = "bold"
      ),
      rowStyle = list(
        fontSize = "14px"
      )
    )
  )
)

Results

x_values <- 1:20
formulas <- c("linear", "quadratic", "cubic", "exponential")

results <- data.frame(x = x_values)

for (f in formulas) {
  results[[f]] <- sapply(x_values, compute_formula, formula = f)
}

htmltools::browsable(
  reactable(
    results,
    searchable = TRUE,
    pagination = TRUE,
    defaultPageSize = 5,
    showPageSizeOptions = TRUE,
    bordered = TRUE,
    striped = TRUE,
    highlight = TRUE,
    fullWidth = TRUE,
    resizable = TRUE,
    defaultColDef = colDef(align = "center"),
    theme = reactableTheme(
      headerStyle = list(
        background = "#DCEEFF",
        fontWeight = "bold",
        textAlign = "center"
      ),
      rowStyle = list(fontSize = "14px"),
      stripedColor = "#F7FBFF"
    )
  )
)

Visualization

library(tidyverse)
library(plotly)

# Generate data
x <- 1:20

results <- tibble(
  x           = x,
  linear      = x,
  quadratic   = x^2,
  cubic       = x^3,
  exponential = exp(x)
)

# Long format
results_long <- results %>%
  pivot_longer(cols = -x, names_to = "Formula", values_to = "y")

# Cap y untuk keterbacaan
results_trimmed <- results_long %>% filter(y <= 1000)

# Warna per formula
colors <- c(
  "linear"      = "#2196F3",
  "quadratic"   = "#4CAF50",
  "cubic"       = "#FF5722",
  "exponential" = "#9C27B0"
)

# Buat plot interaktif
p <- plot_ly()

for (fm in unique(results_trimmed$Formula)) {
  df_fm <- results_trimmed %>% filter(Formula == fm)
  
  p <- p %>% add_trace(
    data       = df_fm,
    x          = ~x,
    y          = ~y,
    type       = "scatter",
    mode       = "lines+markers",
    name       = fm,
    line       = list(color = colors[[fm]], width = 2.5),
    marker     = list(color = colors[[fm]], size = 7),
    hovertemplate = paste0("<b>", fm, "</b><br>x = %{x}<br>y = %{y:,.0f}<extra></extra>")
  )
}

p %>% layout(
  title = list(
    text = "<b>Formula Comparison: Linear, Quadratic, Cubic, Exponential</b>",
    font = list(size = 16)
  ),
  xaxis = list(title = "x Value"),
  yaxis = list(title = "y Value"),
  legend = list(orientation = "h", x = 0.5, xanchor = "center", y = 1.08),
  hovermode = "x unified"
)

2. Nested Simulation: Multi-Sales & Discounts

Conceptual Overview

The function simulates daily sales data and applies conditional discounts:

Function Implementation

simulate_sales <- function(n_salesperson, days) {
  set.seed(42)
  all_data <- data.frame()
  
  # Nested loop: per salesperson -> per day
  for (s in 1:n_salesperson) {
    cumulative_sales <- 0
    
    for (d in 1:days) {
      
      # Generate random sales amount
      sales_amount <- round(runif(1, min = 100, max = 1000), 2)
      
      # Apply conditional discount based on sales amount
      discount_rate <- if (sales_amount >= 800) {
        0.20
      } else if (sales_amount >= 500) {
        0.10
      } else {
        0.05
      }
      
      # Accumulate cumulative sales
      cumulative_sales <- cumulative_sales + sales_amount
      
      # Append row to dataframe
      row <- data.frame(
        sales_id         = s,
        day              = d,
        sales_amount     = sales_amount,
        discount_rate    = discount_rate,
        cumulative_sales = round(cumulative_sales, 2)
      )
      
      all_data <- rbind(all_data, row)
    }
  }
  
  return(all_data)
}
sales_data <- simulate_sales(n_salesperson = 3, days = 10)

Summary Statistics per Salesperson

summary_stats <- sales_data %>%
  group_by(sales_id) %>%
  summarise(
    Total_Sales      = round(sum(sales_amount), 2),
    Avg_Sales        = round(mean(sales_amount), 2),
    Max_Sales        = max(sales_amount),
    Min_Sales        = min(sales_amount),
    Avg_Discount_Pct = round(mean(discount_rate) * 100, 2),
    Final_Cumulative = max(cumulative_sales),
    .groups = "drop"
  )
library(reactable)
library(htmltools)

htmltools::browsable(
  reactable(
    summary_stats,
    searchable = TRUE,
    bordered = TRUE,
    striped = TRUE,
    highlight = TRUE,
    resizable = TRUE,
    defaultPageSize = 5,
    wrap = FALSE,
    fullWidth = TRUE,
    
    defaultColDef = colDef(
      align = "center"
    ),
    
    columns = list(
      sales_id = colDef(name = "Sales ID"),
      Total_Sales = colDef(name = "Total Sales"),
      Avg_Sales = colDef(name = "Avg Sales"),
      Max_Sales = colDef(name = "Max Sales"),
      Min_Sales = colDef(name = "Min Sales"),
      Avg_Discount_Pct = colDef(name = "Avg Discount (%)"),
      Final_Cumulative = colDef(name = "Final Cumulative")
    ),
    
    theme = reactableTheme(
      headerStyle = list(
        background = "#E3F2FD",
        fontWeight = "bold",
        textAlign = "center"
      ),
      rowStyle = list(
        fontSize = "14px"
      )
    )
  )
)

Visualization

3. Hierarchical Performance Classification and Categorization Analysis

Conceptual Overview

The function categorize_performance(sales_amount) classifies sales into 5 performance levels:

Function Implementation

categorize_performance <- function(sales_amount) {
  categories <- character(length(sales_amount))
  
  # Loop through each element of the vector
  for (i in seq_along(sales_amount)) {
    categories[i] <- if (sales_amount[i] >= 900) {
      "Excellent"
    } else if (sales_amount[i] >= 700) {
      "Very Good"
    } else if (sales_amount[i] >= 500) {
      "Good"
    } else if (sales_amount[i] >= 300) {
      "Average"
    } else {
      "Poor"
    }
  }
  
  return(categories)
}
# Generate new sales data
set.seed(123)
sales_vec <- round(runif(100, 100, 1000), 2)

Visualization

Bar Chart

Pie Chart

4. Multi-Company Dataset Generation and Workforce Analytics

Conceptual Overview

Function Implementation

generate_company_data <- function(n_company, n_employees) {
  set.seed(99)
  departments <- c("HR", "Finance", "Marketing", "IT", "Operations")
  all_data    <- data.frame()
  
  for (c in 1:n_company) {
    for (e in 1:n_employees) {
      
      salary            <- round(runif(1, 4000000, 20000000), 0)
      department        <- sample(departments, 1)
      performance_score <- round(runif(1, 50, 100), 1)
      KPI_score         <- round(runif(1, 55, 100), 1)
      
      top_performer <- ifelse(KPI_score > 90, "Yes", "No")
      
      row <- data.frame(
        company_id        = paste0("Company_", c),
        employee_id       = paste0("EMP_", c, "_", e),
        salary            = salary,
        department        = department,
        performance_score = performance_score,
        KPI_score         = KPI_score,
        top_performer     = top_performer
      )
      
      all_data <- rbind(all_data, row)
    }
  }
  
  return(all_data)
}

Summary per Company

library(dplyr)
library(reactable)
library(htmltools)

company_summary <- company_data %>%
  group_by(company_id) %>%
  summarise(
    Avg_Salary      = round(mean(salary), 0),
    Avg_Performance = round(mean(performance_score), 2),
    Max_KPI         = max(KPI_score),
    Avg_KPI         = round(mean(KPI_score), 2),
    Top_Performers  = sum(top_performer == "Yes"),
    .groups = "drop"
  )

htmltools::browsable(
  reactable(
    company_summary,
    bordered = TRUE,
    striped = TRUE,
    highlight = TRUE,
    resizable = TRUE,
    defaultPageSize = 5,
    fullWidth = TRUE,
    
    defaultColDef = colDef(
      align = "center",
      minWidth = 150
    ),
    
    columns = list(
      company_id = colDef(name = "Company"),
      Avg_Salary = colDef(
        name = "Avg Salary",
        cell = function(value) paste0("Rp ", format(value, big.mark = ","))
      ),
      Avg_Performance = colDef(name = "Avg Performance"),
      Max_KPI = colDef(name = "Max KPI"),
      Avg_KPI = colDef(name = "Avg KPI"),
      Top_Performers = colDef(name = "Top Performers")
    ),
    
    theme = reactableTheme(
      headerStyle = list(
        background = "#E3F2FD",
        fontWeight = "bold",
        textAlign = "center"
      )
    )
  )
)

Visualization

Plot 1 : Average Salary per Company

Plot 2 : Average KPI per Company

Plot 3 : KPI Score Distribution

Plot 4 : Top Performers vs Non-Top Performers

5. Monte Carlo Simulation for Pi Estimation and Probability Analysis

Conceptual Overview

Function

monte_carlo_pi <- function(n_points) {
  set.seed(2024)
  
  # Simpan titik
  x <- numeric(n_points)
  y <- numeric(n_points)
  
  # Counter
  inside_circle_count <- 0
  subsquare_count <- 0
  
  # LOOP ITERATION
  for (i in 1:n_points) {
    x[i] <- runif(1, -1, 1)
    y[i] <- runif(1, -1, 1)
    
    # Check inside circle
    if (x[i]^2 + y[i]^2 <= 1) {
      inside_circle_count <- inside_circle_count + 1
    }
    
    # Check inside subsquare (0 to 0.5, 0 to 0.5)
    if (x[i] >= 0 && x[i] <= 0.5 && y[i] >= 0 && y[i] <= 0.5) {
      subsquare_count <- subsquare_count + 1
    }
  }
  
  # Estimate pi
  pi_estimate <- 4 * (inside_circle_count / n_points)
  
  # Probability subsquare
  prob_subsquare <- subsquare_count / n_points
  
  # Label inside/outside for plotting
  inside_circle <- ifelse((x^2 + y^2) <= 1, "Inside", "Outside")
  
  return(list(
    points = data.frame(
      x = x,
      y = y,
      inside = factor(inside_circle, levels = c("Inside", "Outside"))
    ),
    pi_estimate = pi_estimate,
    prob_subsquare = prob_subsquare,
    n_inside = inside_circle_count
  ))
}

Results Table

Visualization Plot: Points Inside & Outside the Circle

6. Advanced Data Transformation & Feature Engineering Techniques

Conceptual Overview

Normalization & Z-Score Functions

# Min-Max Normalization
normalize_columns <- function(df) {
  result <- df
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {
      min_val       <- min(df[[col]], na.rm = TRUE)
      max_val       <- max(df[[col]], na.rm = TRUE)
      result[[col]] <- (df[[col]] - min_val) / (max_val - min_val)
    }
  }
  return(result)
}

# Z-Score Standardization
z_score <- function(df) {
  result <- df
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {
      mu            <- mean(df[[col]], na.rm = TRUE)
      sigma         <- sd(df[[col]],   na.rm = TRUE)
      result[[col]] <- (df[[col]] - mu) / sigma
    }
  }
  return(result)
}

Visualization: Before vs After Transformation

Histogram: Performance Score before vs after

Boxplot: Salary before vs after

Performance Category Distribution

Salary bracket distribution

7. Mini Project: Company KPI Dashboard & Simulation

Conceptual Overview

Generate a dataset for 5 companies with 50 employees each, then perform a full analysis covering top performers, department breakdown, salary distribution, and advanced visualizations.

Generate Dataset

# Generate 5 companies x 50 employees
dashboard_data <- generate_company_data(n_company = 5, n_employees = 50)

# Categorize KPI tier for each employee using a loop
kpi_tier <- character(nrow(dashboard_data))

for (i in seq_len(nrow(dashboard_data))) {
  kpi_tier[i] <- if (dashboard_data$KPI_score[i] >= 90) {
    "Platinum"
  } else if (dashboard_data$KPI_score[i] >= 75) {
    "Gold"
  } else if (dashboard_data$KPI_score[i] >= 60) {
    "Silver"
  } else {
    "Bronze"
  }
}

dashboard_data$KPI_tier <- factor(kpi_tier,
  levels = c("Platinum", "Gold", "Silver", "Bronze"))

Summary per Company

dash_summary <- dashboard_data %>%
  group_by(company_id) %>%
  summarise(
    Avg_Salary      = round(mean(salary), 0),
    Avg_KPI         = round(mean(KPI_score), 2),
    Avg_Performance = round(mean(performance_score), 2),
    Top_Performers  = sum(top_performer == "Yes"),
    Platinum_Tier   = sum(KPI_tier == "Platinum"),
    .groups = "drop"
  )

Top Performers Table

Advanced Visualizations

1. Grouped bar — KPI tier distribution per company

2. Scatter — KPI Score vs Performance Score with regression lines

3. Salary distribution per company (faceted)

4. Average KPI per department (horizontal bar)

8. Automated Report Generation

Conceptual Overview

This bonus task uses functions and loops to automatically generate a summary report for each company, including key metrics and department distribution tables.

Function

generate_company_report <- function(data, company_name) {
  
  company_df <- data %>% filter(company_id == company_name)
  
  # Summary metrics
  summary_tbl <- data.frame(
    Metric = c("Total Employees", "Average Salary", "Average KPI", 
               "Average Performance", "Top Performers"),
    Value  = c(
      nrow(company_df),
      round(mean(company_df$salary), 0),
      round(mean(company_df$KPI_score), 2),
      round(mean(company_df$performance_score), 2),
      sum(company_df$top_performer == "Yes")
    )
  )
  
  # Department distribution
  dept_dist <- company_df %>%
    group_by(department) %>%
    summarise(
      Employees = n(),
      Avg_KPI   = round(mean(KPI_score), 2),
      .groups = "drop"
    )
  
  # KPI Tier distribution
  tier_dist <- company_df %>%
    group_by(KPI_tier) %>%
    summarise(Employees = n(), .groups = "drop") %>%
    mutate(Percentage = round(Employees / sum(Employees) * 100, 1))
  
  return(list(
    company     = company_name,
    summary_tbl = summary_tbl,
    dept_dist   = dept_dist,
    tier_dist   = tier_dist,
    raw_data    = company_df
  ))
}

Automated Report Generation Using Loop

Automated Company Report (Interactive)


Company Report: Company_1

Summary Metrics


Department Distribution


KPI Tier Distribution


Visualization: KPI Tier Distribution


Company Report: Company_2

Summary Metrics


Department Distribution


KPI Tier Distribution


Visualization: KPI Tier Distribution


Company Report: Company_3

Summary Metrics


Department Distribution


KPI Tier Distribution


Visualization: KPI Tier Distribution


Company Report: Company_4

Summary Metrics


Department Distribution


KPI Tier Distribution


Visualization: KPI Tier Distribution


Company Report: Company_5

Summary Metrics


Department Distribution


KPI Tier Distribution


Visualization: KPI Tier Distribution


Export to CSV

write.csv(dashboard_data, "company_full_data.csv", row.names = FALSE)

company_summary_export <- dashboard_data %>%
  group_by(company_id) %>%
  summarise(
    Total_Employees = n(),
    Avg_Salary      = round(mean(salary), 0),
    Avg_KPI         = round(mean(KPI_score), 2),
    Avg_Performance = round(mean(performance_score), 2),
    Top_Performers  = sum(top_performer == "Yes"),
    .groups = "drop"
  )

write.csv(company_summary_export, "company_summary.csv", row.names = FALSE)

# Export department summary
dept_summary_export <- dashboard_data %>%
  group_by(company_id, department) %>%
  summarise(
    Employees = n(),
    Avg_KPI   = round(mean(KPI_score), 2),
    .groups = "drop"
  )

write.csv(dept_summary_export, "company_department_summary.csv", row.names = FALSE)

export_info <- data.frame(
  File = c(
    "company_full_data.csv",
    "company_summary.csv",
    "company_department_summary.csv"
  ),
  Status = c("Exported", "EXported", "Exported"),
  Location = rep(getwd(), 3)
)

htmltools::browsable(
  reactable(
    export_info,
    bordered = TRUE,
    striped = TRUE,
    highlight = TRUE,
    fullWidth = TRUE,
    defaultColDef = colDef(align = "center", minWidth = 250),
    columns = list(
      File = colDef(name = "File Name", style = list(fontWeight = "bold")),
      Status = colDef(name = "Status", style = list(color = "green", fontWeight = "bold")),
      Location = colDef(name = "Saved Location")
    ),
    theme = reactableTheme(
      headerStyle = list(
        background = "#E3F2FD",
        fontWeight = "bold",
        textAlign = "center"
      ),
      rowStyle = list(fontSize = "14px")
    )
  )
)

Conclusion

This project demonstrates the use of functions, loops, and data transformation techniques to build dynamic simulations and analytical models. From mathematical computations to sales simulations and company KPI dashboards, each section highlights how structured programming and visualization can turn raw data into meaningful insights. Overall, it shows the power of combining data processing, statistical methods, and interactive visualization to support better decision-making.