Assignment Week 5 ~ Advanced Practicum: Functions & Loops + Data Science
Chelsea Tesalonika
Patricia Hutajulu
Bakti Siregar, M.Sc., CDS
Major : Data Science at ITSB
Data Science Programming
1. Multi-Formula Mathematical Modeling and Function Implementation
Conceptual Overview
| Formula | Expression |
|---|---|
| Linear | y = x |
| Quadratic | y = x² |
| Cubic | y = x³ |
| Exponential | y = eˣ |
Function Implementation
compute_formula <- function(x, formula) {
valid_formulas <- c("linear", "quadratic", "cubic", "exponential")
if (!formula %in% valid_formulas) {
stop(paste(
"Invalid formula! Choose:",
paste(valid_formulas, collapse = ", ")
))
}
result <- switch(formula,
"linear" = x,
"quadratic" = x^2,
"cubic" = x^3,
"exponential" = exp(x)
)
return(result)
}
library(reactable)
library(htmltools)
# Example Output Table
example_df <- data.frame(
Input = c(5, 2, 3, 1),
Formula = c("quadratic", "exponential", "cubic", "linear"),
Output = c(
compute_formula(5, "quadratic"),
compute_formula(2, "exponential"),
compute_formula(3, "cubic"),
compute_formula(1, "linear")
)
)
htmltools::browsable(
reactable(
example_df,
searchable = TRUE,
bordered = TRUE,
striped = TRUE,
highlight = TRUE,
resizable = TRUE,
defaultPageSize = 5,
wrap = FALSE,
fullWidth = TRUE,
defaultColDef = colDef(align = "center"),
theme = reactableTheme(
headerStyle = list(
background = "#E3F2FD",
fontWeight = "bold"
),
rowStyle = list(
fontSize = "14px"
)
)
)
)Results
x_values <- 1:20
formulas <- c("linear", "quadratic", "cubic", "exponential")
results <- data.frame(x = x_values)
for (f in formulas) {
results[[f]] <- sapply(x_values, compute_formula, formula = f)
}
htmltools::browsable(
reactable(
results,
searchable = TRUE,
pagination = TRUE,
defaultPageSize = 5,
showPageSizeOptions = TRUE,
bordered = TRUE,
striped = TRUE,
highlight = TRUE,
fullWidth = TRUE,
resizable = TRUE,
defaultColDef = colDef(align = "center"),
theme = reactableTheme(
headerStyle = list(
background = "#DCEEFF",
fontWeight = "bold",
textAlign = "center"
),
rowStyle = list(fontSize = "14px"),
stripedColor = "#F7FBFF"
)
)
)Visualization
library(tidyverse)
library(plotly)
# Generate data
x <- 1:20
results <- tibble(
x = x,
linear = x,
quadratic = x^2,
cubic = x^3,
exponential = exp(x)
)
# Long format
results_long <- results %>%
pivot_longer(cols = -x, names_to = "Formula", values_to = "y")
# Cap y untuk keterbacaan
results_trimmed <- results_long %>% filter(y <= 1000)
# Warna per formula
colors <- c(
"linear" = "#2196F3",
"quadratic" = "#4CAF50",
"cubic" = "#FF5722",
"exponential" = "#9C27B0"
)
# Buat plot interaktif
p <- plot_ly()
for (fm in unique(results_trimmed$Formula)) {
df_fm <- results_trimmed %>% filter(Formula == fm)
p <- p %>% add_trace(
data = df_fm,
x = ~x,
y = ~y,
type = "scatter",
mode = "lines+markers",
name = fm,
line = list(color = colors[[fm]], width = 2.5),
marker = list(color = colors[[fm]], size = 7),
hovertemplate = paste0("<b>", fm, "</b><br>x = %{x}<br>y = %{y:,.0f}<extra></extra>")
)
}
p %>% layout(
title = list(
text = "<b>Formula Comparison: Linear, Quadratic, Cubic, Exponential</b>",
font = list(size = 16)
),
xaxis = list(title = "x Value"),
yaxis = list(title = "y Value"),
legend = list(orientation = "h", x = 0.5, xanchor = "center", y = 1.08),
hovermode = "x unified"
)2. Nested Simulation: Multi-Sales & Discounts
Conceptual Overview
The function simulates daily sales data and applies conditional discounts:
Function Implementation
simulate_sales <- function(n_salesperson, days) {
set.seed(42)
all_data <- data.frame()
# Nested loop: per salesperson -> per day
for (s in 1:n_salesperson) {
cumulative_sales <- 0
for (d in 1:days) {
# Generate random sales amount
sales_amount <- round(runif(1, min = 100, max = 1000), 2)
# Apply conditional discount based on sales amount
discount_rate <- if (sales_amount >= 800) {
0.20
} else if (sales_amount >= 500) {
0.10
} else {
0.05
}
# Accumulate cumulative sales
cumulative_sales <- cumulative_sales + sales_amount
# Append row to dataframe
row <- data.frame(
sales_id = s,
day = d,
sales_amount = sales_amount,
discount_rate = discount_rate,
cumulative_sales = round(cumulative_sales, 2)
)
all_data <- rbind(all_data, row)
}
}
return(all_data)
}
sales_data <- simulate_sales(n_salesperson = 3, days = 10)Summary Statistics per Salesperson
summary_stats <- sales_data %>%
group_by(sales_id) %>%
summarise(
Total_Sales = round(sum(sales_amount), 2),
Avg_Sales = round(mean(sales_amount), 2),
Max_Sales = max(sales_amount),
Min_Sales = min(sales_amount),
Avg_Discount_Pct = round(mean(discount_rate) * 100, 2),
Final_Cumulative = max(cumulative_sales),
.groups = "drop"
)
library(reactable)
library(htmltools)
htmltools::browsable(
reactable(
summary_stats,
searchable = TRUE,
bordered = TRUE,
striped = TRUE,
highlight = TRUE,
resizable = TRUE,
defaultPageSize = 5,
wrap = FALSE,
fullWidth = TRUE,
defaultColDef = colDef(
align = "center"
),
columns = list(
sales_id = colDef(name = "Sales ID"),
Total_Sales = colDef(name = "Total Sales"),
Avg_Sales = colDef(name = "Avg Sales"),
Max_Sales = colDef(name = "Max Sales"),
Min_Sales = colDef(name = "Min Sales"),
Avg_Discount_Pct = colDef(name = "Avg Discount (%)"),
Final_Cumulative = colDef(name = "Final Cumulative")
),
theme = reactableTheme(
headerStyle = list(
background = "#E3F2FD",
fontWeight = "bold",
textAlign = "center"
),
rowStyle = list(
fontSize = "14px"
)
)
)
)Visualization
3. Hierarchical Performance Classification and Categorization Analysis
Conceptual Overview
The function categorize_performance(sales_amount)
classifies sales into 5 performance levels:
Function Implementation
categorize_performance <- function(sales_amount) {
categories <- character(length(sales_amount))
# Loop through each element of the vector
for (i in seq_along(sales_amount)) {
categories[i] <- if (sales_amount[i] >= 900) {
"Excellent"
} else if (sales_amount[i] >= 700) {
"Very Good"
} else if (sales_amount[i] >= 500) {
"Good"
} else if (sales_amount[i] >= 300) {
"Average"
} else {
"Poor"
}
}
return(categories)
}
# Generate new sales data
set.seed(123)
sales_vec <- round(runif(100, 100, 1000), 2)Visualization
Bar Chart
Pie Chart
4. Multi-Company Dataset Generation and Workforce Analytics
Conceptual Overview
Function Implementation
generate_company_data <- function(n_company, n_employees) {
set.seed(99)
departments <- c("HR", "Finance", "Marketing", "IT", "Operations")
all_data <- data.frame()
for (c in 1:n_company) {
for (e in 1:n_employees) {
salary <- round(runif(1, 4000000, 20000000), 0)
department <- sample(departments, 1)
performance_score <- round(runif(1, 50, 100), 1)
KPI_score <- round(runif(1, 55, 100), 1)
top_performer <- ifelse(KPI_score > 90, "Yes", "No")
row <- data.frame(
company_id = paste0("Company_", c),
employee_id = paste0("EMP_", c, "_", e),
salary = salary,
department = department,
performance_score = performance_score,
KPI_score = KPI_score,
top_performer = top_performer
)
all_data <- rbind(all_data, row)
}
}
return(all_data)
}Summary per Company
library(dplyr)
library(reactable)
library(htmltools)
company_summary <- company_data %>%
group_by(company_id) %>%
summarise(
Avg_Salary = round(mean(salary), 0),
Avg_Performance = round(mean(performance_score), 2),
Max_KPI = max(KPI_score),
Avg_KPI = round(mean(KPI_score), 2),
Top_Performers = sum(top_performer == "Yes"),
.groups = "drop"
)
htmltools::browsable(
reactable(
company_summary,
bordered = TRUE,
striped = TRUE,
highlight = TRUE,
resizable = TRUE,
defaultPageSize = 5,
fullWidth = TRUE,
defaultColDef = colDef(
align = "center",
minWidth = 150
),
columns = list(
company_id = colDef(name = "Company"),
Avg_Salary = colDef(
name = "Avg Salary",
cell = function(value) paste0("Rp ", format(value, big.mark = ","))
),
Avg_Performance = colDef(name = "Avg Performance"),
Max_KPI = colDef(name = "Max KPI"),
Avg_KPI = colDef(name = "Avg KPI"),
Top_Performers = colDef(name = "Top Performers")
),
theme = reactableTheme(
headerStyle = list(
background = "#E3F2FD",
fontWeight = "bold",
textAlign = "center"
)
)
)
)Visualization
Plot 1 : Average Salary per Company
Plot 2 : Average KPI per Company
Plot 3 : KPI Score Distribution
Plot 4 : Top Performers vs Non-Top Performers
5. Monte Carlo Simulation for Pi Estimation and Probability Analysis
Conceptual Overview
Function
monte_carlo_pi <- function(n_points) {
set.seed(2024)
# Simpan titik
x <- numeric(n_points)
y <- numeric(n_points)
# Counter
inside_circle_count <- 0
subsquare_count <- 0
# LOOP ITERATION
for (i in 1:n_points) {
x[i] <- runif(1, -1, 1)
y[i] <- runif(1, -1, 1)
# Check inside circle
if (x[i]^2 + y[i]^2 <= 1) {
inside_circle_count <- inside_circle_count + 1
}
# Check inside subsquare (0 to 0.5, 0 to 0.5)
if (x[i] >= 0 && x[i] <= 0.5 && y[i] >= 0 && y[i] <= 0.5) {
subsquare_count <- subsquare_count + 1
}
}
# Estimate pi
pi_estimate <- 4 * (inside_circle_count / n_points)
# Probability subsquare
prob_subsquare <- subsquare_count / n_points
# Label inside/outside for plotting
inside_circle <- ifelse((x^2 + y^2) <= 1, "Inside", "Outside")
return(list(
points = data.frame(
x = x,
y = y,
inside = factor(inside_circle, levels = c("Inside", "Outside"))
),
pi_estimate = pi_estimate,
prob_subsquare = prob_subsquare,
n_inside = inside_circle_count
))
}Results Table
Visualization Plot: Points Inside & Outside the Circle
6. Advanced Data Transformation & Feature Engineering Techniques
Conceptual Overview
Normalization & Z-Score Functions
# Min-Max Normalization
normalize_columns <- function(df) {
result <- df
for (col in names(df)) {
if (is.numeric(df[[col]])) {
min_val <- min(df[[col]], na.rm = TRUE)
max_val <- max(df[[col]], na.rm = TRUE)
result[[col]] <- (df[[col]] - min_val) / (max_val - min_val)
}
}
return(result)
}
# Z-Score Standardization
z_score <- function(df) {
result <- df
for (col in names(df)) {
if (is.numeric(df[[col]])) {
mu <- mean(df[[col]], na.rm = TRUE)
sigma <- sd(df[[col]], na.rm = TRUE)
result[[col]] <- (df[[col]] - mu) / sigma
}
}
return(result)
}Visualization: Before vs After Transformation
Histogram: Performance Score before vs after
Boxplot: Salary before vs after
Performance Category Distribution
Salary bracket distribution
7. Mini Project: Company KPI Dashboard & Simulation
Conceptual Overview
Generate a dataset for 5 companies with 50 employees each, then perform a full analysis covering top performers, department breakdown, salary distribution, and advanced visualizations.
Generate Dataset
# Generate 5 companies x 50 employees
dashboard_data <- generate_company_data(n_company = 5, n_employees = 50)
# Categorize KPI tier for each employee using a loop
kpi_tier <- character(nrow(dashboard_data))
for (i in seq_len(nrow(dashboard_data))) {
kpi_tier[i] <- if (dashboard_data$KPI_score[i] >= 90) {
"Platinum"
} else if (dashboard_data$KPI_score[i] >= 75) {
"Gold"
} else if (dashboard_data$KPI_score[i] >= 60) {
"Silver"
} else {
"Bronze"
}
}
dashboard_data$KPI_tier <- factor(kpi_tier,
levels = c("Platinum", "Gold", "Silver", "Bronze"))Summary per Company
dash_summary <- dashboard_data %>%
group_by(company_id) %>%
summarise(
Avg_Salary = round(mean(salary), 0),
Avg_KPI = round(mean(KPI_score), 2),
Avg_Performance = round(mean(performance_score), 2),
Top_Performers = sum(top_performer == "Yes"),
Platinum_Tier = sum(KPI_tier == "Platinum"),
.groups = "drop"
)Top Performers Table
Advanced Visualizations
1. Grouped bar — KPI tier distribution per company
2. Scatter — KPI Score vs Performance Score with regression lines
3. Salary distribution per company (faceted)
4. Average KPI per department (horizontal bar)
8. Automated Report Generation
Conceptual Overview
This bonus task uses functions and loops to automatically generate a summary report for each company, including key metrics and department distribution tables.
Function
generate_company_report <- function(data, company_name) {
company_df <- data %>% filter(company_id == company_name)
# Summary metrics
summary_tbl <- data.frame(
Metric = c("Total Employees", "Average Salary", "Average KPI",
"Average Performance", "Top Performers"),
Value = c(
nrow(company_df),
round(mean(company_df$salary), 0),
round(mean(company_df$KPI_score), 2),
round(mean(company_df$performance_score), 2),
sum(company_df$top_performer == "Yes")
)
)
# Department distribution
dept_dist <- company_df %>%
group_by(department) %>%
summarise(
Employees = n(),
Avg_KPI = round(mean(KPI_score), 2),
.groups = "drop"
)
# KPI Tier distribution
tier_dist <- company_df %>%
group_by(KPI_tier) %>%
summarise(Employees = n(), .groups = "drop") %>%
mutate(Percentage = round(Employees / sum(Employees) * 100, 1))
return(list(
company = company_name,
summary_tbl = summary_tbl,
dept_dist = dept_dist,
tier_dist = tier_dist,
raw_data = company_df
))
}Automated Report Generation Using Loop
Automated Company Report (Interactive)
Company Report: Company_1
Summary Metrics
Department Distribution
KPI Tier Distribution
Visualization: KPI Tier Distribution
Company Report: Company_2
Summary Metrics
Department Distribution
KPI Tier Distribution
Visualization: KPI Tier Distribution
Company Report: Company_3
Summary Metrics
Department Distribution
KPI Tier Distribution
Visualization: KPI Tier Distribution
Company Report: Company_4
Summary Metrics
Department Distribution
KPI Tier Distribution
Visualization: KPI Tier Distribution
Company Report: Company_5
Summary Metrics
Department Distribution
KPI Tier Distribution
Visualization: KPI Tier Distribution
Export to CSV
write.csv(dashboard_data, "company_full_data.csv", row.names = FALSE)
company_summary_export <- dashboard_data %>%
group_by(company_id) %>%
summarise(
Total_Employees = n(),
Avg_Salary = round(mean(salary), 0),
Avg_KPI = round(mean(KPI_score), 2),
Avg_Performance = round(mean(performance_score), 2),
Top_Performers = sum(top_performer == "Yes"),
.groups = "drop"
)
write.csv(company_summary_export, "company_summary.csv", row.names = FALSE)
# Export department summary
dept_summary_export <- dashboard_data %>%
group_by(company_id, department) %>%
summarise(
Employees = n(),
Avg_KPI = round(mean(KPI_score), 2),
.groups = "drop"
)
write.csv(dept_summary_export, "company_department_summary.csv", row.names = FALSE)
export_info <- data.frame(
File = c(
"company_full_data.csv",
"company_summary.csv",
"company_department_summary.csv"
),
Status = c("Exported", "EXported", "Exported"),
Location = rep(getwd(), 3)
)
htmltools::browsable(
reactable(
export_info,
bordered = TRUE,
striped = TRUE,
highlight = TRUE,
fullWidth = TRUE,
defaultColDef = colDef(align = "center", minWidth = 250),
columns = list(
File = colDef(name = "File Name", style = list(fontWeight = "bold")),
Status = colDef(name = "Status", style = list(color = "green", fontWeight = "bold")),
Location = colDef(name = "Saved Location")
),
theme = reactableTheme(
headerStyle = list(
background = "#E3F2FD",
fontWeight = "bold",
textAlign = "center"
),
rowStyle = list(fontSize = "14px")
)
)
)Conclusion
This project demonstrates the use of functions, loops, and data transformation techniques to build dynamic simulations and analytical models. From mathematical computations to sales simulations and company KPI dashboards, each section highlights how structured programming and visualization can turn raw data into meaningful insights. Overall, it shows the power of combining data processing, statistical methods, and interactive visualization to support better decision-making.