Task 2: Nested Simulation — Multi-Sales &
Discounts
Task 2 The function
simulate_sales(n_salesperson, days) simulates daily sales
data using nested loops. A nested get_discount() function
applies conditional discounts based on the sales
amount. Outputs include cumulative sales per salesperson and a summary
statistics table.
simulate_sales <- function(n_salesperson, days) {
set.seed(42)
# Nested function: determine discount based on sales amount
get_discount <- function(amount) {
if (amount >= 5000) return(0.20) # 20% discount
else if (amount >= 3000) return(0.15) # 15% discount
else if (amount >= 1000) return(0.10) # 10% discount
else return(0.05) # 5% discount
}
all_sales <- data.frame()
# Outer loop: per salesperson
for (sp in 1:n_salesperson) {
cumulative_sales <- 0
# Inner loop: per day
for (d in 1:days) {
amount <- round(runif(1, 500, 6000), 2)
discount <- get_discount(amount)
cumulative_sales <- cumulative_sales + amount
all_sales <- rbind(all_sales, data.frame(
sales_id = sp,
day = d,
sales_amount = amount,
discount_rate = discount,
cumulative = round(cumulative_sales, 2)
))
}
}
return(all_sales)
}
sales_data <- simulate_sales(n_salesperson = 5, days = 10)
knitr::kable(head(sales_data, 10), digits = 2,
caption = "Sales Data Sample (first 10 rows)")
Sales Data Sample (first 10 rows)
| 1 |
1 |
5531.43 |
0.20 |
5531.43 |
| 1 |
2 |
5653.91 |
0.20 |
11185.34 |
| 1 |
3 |
2073.77 |
0.10 |
13259.11 |
| 1 |
4 |
5067.46 |
0.20 |
18326.57 |
| 1 |
5 |
4029.60 |
0.15 |
22356.17 |
| 1 |
6 |
3355.03 |
0.15 |
25711.20 |
| 1 |
7 |
4551.24 |
0.15 |
30262.44 |
| 1 |
8 |
1240.67 |
0.10 |
31503.11 |
| 1 |
9 |
4113.46 |
0.15 |
35616.57 |
| 1 |
10 |
4377.86 |
0.15 |
39994.43 |
# Summary statistics per salesperson
summary_sales <- sales_data %>%
group_by(sales_id) %>%
summarise(
Total_Sales = sum(sales_amount),
Avg_Sales = round(mean(sales_amount), 2),
Avg_Discount = round(mean(discount_rate), 3),
Max_Cumulative = max(cumulative),
.groups = "drop"
)
knitr::kable(summary_sales, digits = 2,
caption = "Summary Statistics per Salesperson")
Summary Statistics per Salesperson
| 1 |
39994.43 |
3999.44 |
0.16 |
39994.43 |
| 2 |
37451.69 |
3745.17 |
0.16 |
37451.69 |
| 3 |
38846.26 |
3884.63 |
0.15 |
38846.26 |
| 4 |
33556.92 |
3355.69 |
0.13 |
33556.92 |
| 5 |
39832.42 |
3983.24 |
0.14 |
39832.42 |
ggplot(sales_data, aes(x = day, y = cumulative,
color = factor(sales_id), group = sales_id)) +
geom_line(size = 1.4) +
geom_point(size = 2.8) +
scale_color_brewer(palette = "Set1") +
labs(
title = "Cumulative Sales per Salesperson",
subtitle = "Over 10 Working Days",
x = "Day", y = "Cumulative Sales ($)", color = "Salesperson ID"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", color = "#2c3e50", size = 15),
legend.position = "top",
panel.grid.minor = element_blank()
)

Task 4: Multi-Company Dataset Simulation
Task 4 The function
generate_company_data(n_company, n_employees) uses nested
loops to build a multi-company HR dataset. Conditional logic flags
top performers (KPI > 90). Output includes a summary
table and comparative plots.
generate_company_data <- function(n_company, n_employees) {
set.seed(123)
departments <- c("HR", "Finance", "Marketing", "IT", "Operations")
all_data <- data.frame()
# Outer loop: per company
for (c_id in 1:n_company) {
# Inner loop: per employee
for (e_id in 1:n_employees) {
all_data <- rbind(all_data, data.frame(
company_id = paste0("C", c_id),
employee_id = paste0("E", c_id, "_", e_id),
salary = round(runif(1, 3000, 15000), 2),
department = sample(departments, 1),
performance_score = round(runif(1, 50, 100), 1),
KPI_score = round(runif(1, 60, 100), 1)
))
}
}
return(all_data)
}
company_data <- generate_company_data(n_company = 4, n_employees = 30)
# Summary per company with top performer identification
company_summary <- company_data %>%
group_by(company_id) %>%
summarise(
Avg_Salary = round(mean(salary), 2),
Avg_Performance = round(mean(performance_score), 2),
Max_KPI = max(KPI_score),
Top_Performers = sum(KPI_score > 90), # Conditional: KPI > 90
.groups = "drop"
)
knitr::kable(company_summary, caption = "Company Summary with Top Performers (KPI > 90)")
Company Summary with Top Performers (KPI > 90)
| C1 |
8430.26 |
77.12 |
99.4 |
9 |
| C2 |
8710.82 |
70.03 |
98.9 |
9 |
| C3 |
8660.28 |
76.99 |
95.2 |
5 |
| C4 |
8860.94 |
75.40 |
99.7 |
6 |
p1 <- ggplot(company_summary,
aes(x = company_id, y = Avg_Salary, fill = company_id)) +
geom_col(show.legend = FALSE, width = 0.6) +
geom_text(aes(label = scales::comma(round(Avg_Salary))),
vjust = -0.5, size = 3.8, fontface = "bold") +
scale_fill_manual(values = c("#2980b9","#1abc9c","#e67e22","#9b59b6")) +
labs(title = "Average Salary per Company",
x = "Company", y = "Avg Salary ($)") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"),
panel.grid.major.x = element_blank())
p2 <- ggplot(company_summary,
aes(x = company_id, y = Top_Performers, fill = company_id)) +
geom_col(show.legend = FALSE, width = 0.6) +
geom_text(aes(label = Top_Performers),
vjust = -0.5, size = 4.5, fontface = "bold") +
scale_fill_manual(values = c("#27ae60","#2ecc71","#16a085","#1abc9c")) +
labs(title = "Top Performers per Company (KPI > 90)",
x = "Company", y = "Number of Employees") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"),
panel.grid.major.x = element_blank())
grid.arrange(p1, p2, ncol = 2)

Task 5: Monte Carlo Simulation — Pi &
Probability
Task 5 The function
monte_carlo_pi(n_points) estimates \(\pi\) using random point sampling and
computes the probability of points falling inside a sub-square \([-0.5, 0.5]^2\). Points inside vs. outside
the unit circle are visualized.
monte_carlo_pi <- function(n_points) {
set.seed(99)
x <- runif(n_points, -1, 1)
y <- runif(n_points, -1, 1)
inside_circle <- 0
inside_subsquare <- 0
# Loop to count points in circle and sub-square
for (i in 1:n_points) {
dist <- x[i]^2 + y[i]^2
if (dist <= 1) inside_circle <- inside_circle + 1
if (abs(x[i]) <= 0.5 && abs(y[i]) <= 0.5)
inside_subsquare <- inside_subsquare + 1
}
list(
pi_estimate = 4 * inside_circle / n_points,
prob_subsquare = inside_subsquare / n_points,
inside = (x^2 + y^2) <= 1,
x = x, y = y
)
}
mc <- monte_carlo_pi(10000)
cat(sprintf("Pi Estimate : %.6f\n", mc$pi_estimate))
## Pi Estimate : 3.146800
cat(sprintf("True Pi (actual) : %.6f\n", pi))
## True Pi (actual) : 3.141593
cat(sprintf("Absolute Error : %.6f\n", abs(mc$pi_estimate - pi)))
## Absolute Error : 0.005207
cat(sprintf("Relative Error : %.4f%%\n",
abs(mc$pi_estimate - pi) / pi * 100))
## Relative Error : 0.1658%
cat(sprintf("P(point in sub-sq.) : %.4f\n", mc$prob_subsquare))
## P(point in sub-sq.) : 0.2542
cat(sprintf("Theoretical P : 0.2500 (area 1/4 of full square)\n"))
## Theoretical P : 0.2500 (area 1/4 of full square)
plot_df <- data.frame(x = mc$x, y = mc$y, inside = mc$inside)
idx <- sample(nrow(plot_df), 3000) # subsample for speed
ggplot(plot_df[idx, ], aes(x = x, y = y, color = inside)) +
geom_point(size = 0.7, alpha = 0.65) +
# Unit circle boundary
annotate("path",
x = cos(seq(0, 2 * pi, length.out = 400)),
y = sin(seq(0, 2 * pi, length.out = 400)),
color = "#2c3e50", size = 1.1) +
# Sub-square boundary
annotate("rect", xmin = -0.5, xmax = 0.5, ymin = -0.5, ymax = 0.5,
fill = NA, color = "#e67e22", linetype = "dashed", size = 1.1) +
scale_color_manual(
values = c("TRUE" = "#2980b9", "FALSE" = "#e74c3c"),
labels = c("Outside Circle", "Inside Circle")
) +
coord_fixed() +
labs(
title = bquote("Monte Carlo Estimation: " ~ pi ~ " \u2248 " ~
.(round(mc$pi_estimate, 5))),
subtitle = "Orange dashed square = sub-square for probability analysis",
color = NULL, x = "x", y = "y"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", color = "#2c3e50", size = 14),
plot.subtitle = element_text(color = "#7f8c8d"),
legend.position = "top"
)

Task 6: Advanced Data Transformation &
Feature Engineering
Task 6 Two normalization functions —
normalize_columns() (Min-Max) and z_score()
(Standardization) — apply loop-based transformations. New features
performance_category and salary_bracket are
engineered. Before/after distributions are compared via histograms.
# Min-Max Normalization: scales values to [0, 1]
normalize_columns <- function(df) {
df_norm <- df
for (col in names(df)[sapply(df, is.numeric)]) {
mn <- min(df[[col]], na.rm = TRUE)
mx <- max(df[[col]], na.rm = TRUE)
df_norm[[col]] <- (df[[col]] - mn) / (mx - mn)
}
return(df_norm)
}
# Z-Score Standardization: mean = 0, sd = 1
z_score <- function(df) {
df_z <- df
for (col in names(df)[sapply(df, is.numeric)]) {
df_z[[col]] <- (df[[col]] - mean(df[[col]], na.rm = TRUE)) /
sd(df[[col]], na.rm = TRUE)
}
return(df_z)
}
df_raw <- company_data[, c("salary", "performance_score", "KPI_score")]
df_norm <- normalize_columns(df_raw)
df_z <- z_score(df_raw)
# Feature engineering: create categorical features
company_data$performance_category <- cut(
company_data$performance_score,
breaks = c(0, 60, 70, 80, 90, 100),
labels = c("Poor", "Average", "Good", "Very Good", "Excellent")
)
company_data$salary_bracket <- cut(
company_data$salary,
breaks = c(0, 5000, 8000, 11000, 15000),
labels = c("Low", "Medium", "High", "Very High")
)
cat("=== Before Normalization ===\n")
## === Before Normalization ===
# Gunakan apply untuk merundingkan angka pada kolom numerik saja
print(summary(df_raw))
## salary performance_score KPI_score
## Min. : 3113 Min. :50.30 Min. :60.00
## 1st Qu.: 5606 1st Qu.:62.75 1st Qu.:70.40
## Median : 8498 Median :71.40 Median :81.35
## Mean : 8666 Mean :74.89 Mean :80.06
## 3rd Qu.:11719 3rd Qu.:88.70 3rd Qu.:89.38
## Max. :14993 Max. :99.70 Max. :99.70
cat("\n=== After Min-Max Normalization ===\n")
##
## === After Min-Max Normalization ===
# Sama seperti di atas, hindari membungkus summary dengan round secara langsung
print(summary(df_norm))
## salary performance_score KPI_score
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2098 1st Qu.:0.2520 1st Qu.:0.2620
## Median :0.4533 Median :0.4271 Median :0.5378
## Mean :0.4674 Mean :0.4977 Mean :0.5052
## 3rd Qu.:0.7244 3rd Qu.:0.7773 3rd Qu.:0.7399
## Max. :1.0000 Max. :1.0000 Max. :1.0000
make_hist <- function(data, col, title, fill_col) {
ggplot(data.frame(v = data[[col]]), aes(x = v)) +
geom_histogram(bins = 15, fill = fill_col, color = "white", alpha = 0.88) +
labs(title = title, x = col, y = "Frequency") +
theme_minimal(base_size = 11) +
theme(plot.title = element_text(face = "bold", size = 10),
panel.grid.minor = element_blank())
}
grid.arrange(
make_hist(df_raw, "salary", "Salary — Raw", "#aed6f1"),
make_hist(df_norm, "salary", "Salary — Min-Max Normalized", "#2980b9"),
make_hist(df_raw, "KPI_score", "KPI Score — Raw", "#a9dfbf"),
make_hist(df_norm, "KPI_score", "KPI Score — Normalized", "#27ae60"),
make_hist(df_raw, "performance_score", "Performance — Raw", "#fad7a0"),
make_hist(df_z, "performance_score", "Performance — Z-Score", "#e67e22"),
ncol = 2
)

Task 7: Mini Project — Company KPI
Dashboard
Task 7 A dataset of 7 companies × 80
employees is generated. Employees are categorized into KPI tiers using a
loop. Visualizations include a grouped bar chart by department, a
scatter plot with per-company regression lines, and a faceted KPI tier
distribution chart.
set.seed(2024)
big_data <- generate_company_data(n_company = 7, n_employees = 80)
# Loop to assign KPI tier to each employee
kpi_tier <- character(nrow(big_data))
for (i in 1:nrow(big_data)) {
kpi <- big_data$KPI_score[i]
if (kpi >= 90) kpi_tier[i] <- "Platinum"
else if (kpi >= 80) kpi_tier[i] <- "Gold"
else if (kpi >= 70) kpi_tier[i] <- "Silver"
else kpi_tier[i] <- "Bronze"
}
big_data$KPI_tier <- factor(kpi_tier,
levels = c("Bronze", "Silver", "Gold", "Platinum"))
cat("Dataset dimensions:", nrow(big_data), "rows ×", ncol(big_data), "columns\n")
## Dataset dimensions: 560 rows × 7 columns
## company_id employee_id salary department performance_score KPI_score
## 1 C1 E1_1 6450.93 Marketing 94.2 97.6
## 2 C1 E1_2 3546.68 Finance 94.6 82.1
## 3 C1 E1_3 8479.38 IT 72.7 87.1
## 4 C1 E1_4 9871.60 Finance 95.0 69.8
## 5 C1 E1_5 3504.71 Marketing 97.7 95.6
## 6 C1 E1_6 11313.64 HR 99.7 86.2
## KPI_tier
## 1 Platinum
## 2 Gold
## 3 Gold
## 4 Bronze
## 5 Platinum
## 6 Gold
kpi_summary <- big_data %>%
group_by(company_id) %>%
summarise(
Employees = n(),
Avg_Salary = round(mean(salary), 2),
Avg_KPI = round(mean(KPI_score), 2),
Top_Performers = sum(KPI_score >= 90),
Pct_Top = paste0(round(sum(KPI_score >= 90) / n() * 100, 1), "%"),
.groups = "drop"
)
knitr::kable(kpi_summary, caption = "KPI Dashboard Summary per Company")
KPI Dashboard Summary per Company
| C1 |
80 |
8376.95 |
82.03 |
22 |
27.5% |
| C2 |
80 |
9221.18 |
77.62 |
16 |
20% |
| C3 |
80 |
9025.40 |
78.07 |
13 |
16.2% |
| C4 |
80 |
8777.38 |
80.58 |
21 |
26.2% |
| C5 |
80 |
9147.43 |
81.04 |
25 |
31.2% |
| C6 |
80 |
9246.89 |
80.10 |
19 |
23.8% |
| C7 |
80 |
8800.90 |
82.36 |
27 |
33.8% |
dept_summary <- big_data %>%
group_by(company_id, department) %>%
summarise(avg_salary = round(mean(salary), 2), .groups = "drop")
# Plot 1: Grouped bar chart — avg salary by dept & company
p1 <- ggplot(dept_summary, aes(x = department, y = avg_salary, fill = company_id)) +
geom_col(position = "dodge", width = 0.75) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Average Salary by Department & Company",
x = "Department", y = "Average Salary ($)", fill = "Company") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 25, hjust = 1),
plot.title = element_text(face = "bold", size = 13),
panel.grid.major.x = element_blank())
# Plot 2: Scatter plot — salary vs KPI with regression lines
p2 <- ggplot(big_data, aes(x = salary, y = KPI_score, color = company_id)) +
geom_point(alpha = 0.4, size = 1.8) +
geom_smooth(method = "lm", se = FALSE, size = 1, aes(group = company_id)) +
scale_color_brewer(palette = "Set1") +
labs(title = "Salary vs KPI Score with Regression Lines per Company",
x = "Salary ($)", y = "KPI Score", color = "Company") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold", size = 13),
panel.grid.minor = element_blank())
# Plot 3: Faceted KPI tier distribution
tier_pal <- c("Bronze" = "#cd7f32",
"Silver" = "#bdc3c7",
"Gold" = "#f4d03f",
"Platinum" = "#85c1e9")
p3 <- ggplot(big_data, aes(x = KPI_tier, fill = KPI_tier)) +
geom_bar(width = 0.7) +
facet_wrap(~company_id, nrow = 2) +
scale_fill_manual(values = tier_pal) +
labs(title = "KPI Tier Distribution per Company",
x = "KPI Tier", y = "Number of Employees") +
theme_minimal(base_size = 11) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1),
plot.title = element_text(face = "bold", size = 13),
strip.text = element_text(face = "bold"))
grid.arrange(p1, p2, p3, ncol = 1)

Task 8 (Bonus): Automated Report
Generation
Bonus Using
functions + loops, an automated summary report is
generated for each company. Each report includes key statistics and a
department headcount table. Data is also exported to CSV files.
for (co in unique(big_data$company_id)) {
cat("\n### Company:", co, "\n\n")
sub <- big_data[big_data$company_id == co, ]
cat('<div class="report-card">\n\n')
cat(sprintf("- **Total Employees:** %d \n", nrow(sub)))
cat(sprintf("- **Average Salary:** $%s \n",
format(round(mean(sub$salary), 2), big.mark = ",")))
cat(sprintf("- **Average KPI Score:** %.2f \n", mean(sub$KPI_score)))
cat(sprintf("- **Average Performance:** %.2f \n", mean(sub$performance_score)))
cat(sprintf("- **Top Performers (KPI >= 90):** %d employees \n\n",
sum(sub$KPI_score >= 90)))
dept_tbl <- as.data.frame(table(sub$department))
colnames(dept_tbl) <- c("Department", "Headcount")
print(knitr::kable(dept_tbl, format = "html"))
cat('\n</div>\n\n')
}
Company: C1
- Total Employees: 80
- Average Salary: $8,376.95
- Average KPI Score: 82.03
- Average Performance: 74.63
- Top Performers (KPI >= 90): 22 employees
|
Department
|
Headcount
|
|
Finance
|
15
|
|
HR
|
14
|
|
IT
|
16
|
|
Marketing
|
18
|
|
Operations
|
17
|
Company: C2
- Total Employees: 80
- Average Salary: $9,221.18
- Average KPI Score: 77.62
- Average Performance: 75.54
- Top Performers (KPI >= 90): 16 employees
|
Department
|
Headcount
|
|
Finance
|
11
|
|
HR
|
10
|
|
IT
|
23
|
|
Marketing
|
23
|
|
Operations
|
13
|
Company: C3
- Total Employees: 80
- Average Salary: $9,025.4
- Average KPI Score: 78.07
- Average Performance: 73.22
- Top Performers (KPI >= 90): 13 employees
|
Department
|
Headcount
|
|
Finance
|
15
|
|
HR
|
14
|
|
IT
|
17
|
|
Marketing
|
15
|
|
Operations
|
19
|
Company: C4
- Total Employees: 80
- Average Salary: $8,777.38
- Average KPI Score: 80.58
- Average Performance: 74.81
- Top Performers (KPI >= 90): 21 employees
|
Department
|
Headcount
|
|
Finance
|
13
|
|
HR
|
21
|
|
IT
|
13
|
|
Marketing
|
12
|
|
Operations
|
21
|
Company: C5
- Total Employees: 80
- Average Salary: $9,147.43
- Average KPI Score: 81.05
- Average Performance: 77.31
- Top Performers (KPI >= 90): 25 employees
|
Department
|
Headcount
|
|
Finance
|
13
|
|
HR
|
19
|
|
IT
|
16
|
|
Marketing
|
19
|
|
Operations
|
13
|
Company: C6
- Total Employees: 80
- Average Salary: $9,246.89
- Average KPI Score: 80.10
- Average Performance: 73.11
- Top Performers (KPI >= 90): 19 employees
|
Department
|
Headcount
|
|
Finance
|
15
|
|
HR
|
18
|
|
IT
|
13
|
|
Marketing
|
15
|
|
Operations
|
19
|
Company: C7
- Total Employees: 80
- Average Salary: $8,800.9
- Average KPI Score: 82.36
- Average Performance: 72.27
- Top Performers (KPI >= 90): 27 employees
|
Department
|
Headcount
|
|
Finance
|
18
|
|
HR
|
17
|
|
IT
|
14
|
|
Marketing
|
19
|
|
Operations
|
12
|
# Optional: export datasets to CSV
write.csv(big_data, "company_data_full.csv", row.names = FALSE)
write.csv(kpi_summary, "kpi_summary_per_company.csv", row.names = FALSE)
cat("Data successfully exported to CSV.\n")