Task 1: Dynamic Multi-Formula Function

Task 1 The function compute_formula(x, formula) computes values for four formula types — linear, quadratic, cubic, and exponential — using nested loops and input validation. All formulas are plotted on the same graph for $x = 1:20$.

# ── Main function with input validation ─────────────────────────────────────
compute_formula <- function(x, formula) {
  valid_formulas <- c("linear", "quadratic", "cubic", "exponential")

  # Validate formula input
  if (!formula %in% valid_formulas) {
    stop(paste("Invalid formula! Choose one of:",
               paste(valid_formulas, collapse = ", ")))
  }

  # Compute value based on formula type
  result <- switch(formula,
    "linear"      = 2 * x + 1,
    "quadratic"   = x^2 + 3 * x + 2,
    "cubic"       = x^3 - 2 * x^2 + x - 1,
    "exponential" = exp(0.3 * x)
  )
  return(result)
}

x_vals   <- 1:20
formulas <- c("linear", "quadratic", "cubic", "exponential")
results  <- list()

# Nested loop: compute all formulas across all x values
for (f in formulas) {
  y_vals <- numeric(length(x_vals))
  for (i in seq_along(x_vals)) {
    y_vals[i] <- compute_formula(x_vals[i], f)
  }
  results[[f]] <- y_vals
}

# Build result data frame and preview
df_result <- data.frame(
  x           = x_vals,
  linear      = results[["linear"]],
  quadratic   = results[["quadratic"]],
  cubic       = results[["cubic"]],
  exponential = results[["exponential"]]
)

knitr::kable(head(df_result, 6), digits = 2,
             caption = "Computation Results Preview (first 6 rows)")

Computation Results Preview (first 6 rows)
x	linear	quadratic	cubic	exponential
1	3	6	-1	1.35
2	5	12	1	1.82
3	7	20	11	2.46
4	9	30	35	3.32
5	11	42	79	4.48
6	13	56	149	6.05

# Reshape to long format for ggplot
df_long <- pivot_longer(df_result, -x, names_to = "Formula", values_to = "y")

ggplot(df_long, aes(x = x, y = y, color = Formula)) +
  geom_line(size = 1.4) +
  geom_point(size = 2.2, alpha = 0.85) +
  scale_color_manual(values = c(
    "linear"      = "#2980b9",
    "quadratic"   = "#e74c3c",
    "cubic"       = "#27ae60",
    "exponential" = "#8e44ad"
  )) +
  labs(
    title    = "Comparison of Four Mathematical Formulas",
    subtitle = "Evaluated for x = 1 to 20",
    x = "x", y = "f(x)", color = "Formula"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title      = element_text(face = "bold", color = "#2c3e50", size = 15),
    plot.subtitle   = element_text(color = "#7f8c8d"),
    legend.position = "top",
    panel.grid.minor = element_blank()
  )

Task 2: Nested Simulation — Multi-Sales & Discounts

Task 2 The function simulate_sales(n_salesperson, days) simulates daily sales data using nested loops. A nested get_discount() function applies conditional discounts based on the sales amount. Outputs include cumulative sales per salesperson and a summary statistics table.

simulate_sales <- function(n_salesperson, days) {
  set.seed(42)

  # Nested function: determine discount based on sales amount
  get_discount <- function(amount) {
    if      (amount >= 5000) return(0.20)   # 20% discount
    else if (amount >= 3000) return(0.15)   # 15% discount
    else if (amount >= 1000) return(0.10)   # 10% discount
    else                     return(0.05)   # 5%  discount
  }

  all_sales <- data.frame()

  # Outer loop: per salesperson
  for (sp in 1:n_salesperson) {
    cumulative_sales <- 0

    # Inner loop: per day
    for (d in 1:days) {
      amount           <- round(runif(1, 500, 6000), 2)
      discount         <- get_discount(amount)
      cumulative_sales <- cumulative_sales + amount

      all_sales <- rbind(all_sales, data.frame(
        sales_id      = sp,
        day           = d,
        sales_amount  = amount,
        discount_rate = discount,
        cumulative    = round(cumulative_sales, 2)
      ))
    }
  }
  return(all_sales)
}

sales_data <- simulate_sales(n_salesperson = 5, days = 10)

knitr::kable(head(sales_data, 10), digits = 2,
             caption = "Sales Data Sample (first 10 rows)")

Sales Data Sample (first 10 rows)
sales_id	day	sales_amount	discount_rate	cumulative
1	1	5531.43	0.20	5531.43
1	2	5653.91	0.20	11185.34
1	3	2073.77	0.10	13259.11
1	4	5067.46	0.20	18326.57
1	5	4029.60	0.15	22356.17
1	6	3355.03	0.15	25711.20
1	7	4551.24	0.15	30262.44
1	8	1240.67	0.10	31503.11
1	9	4113.46	0.15	35616.57
1	10	4377.86	0.15	39994.43

# Summary statistics per salesperson
summary_sales <- sales_data %>%
  group_by(sales_id) %>%
  summarise(
    Total_Sales   = sum(sales_amount),
    Avg_Sales     = round(mean(sales_amount), 2),
    Avg_Discount  = round(mean(discount_rate), 3),
    Max_Cumulative = max(cumulative),
    .groups = "drop"
  )

knitr::kable(summary_sales, digits = 2,
             caption = "Summary Statistics per Salesperson")

Summary Statistics per Salesperson
sales_id	Total_Sales	Avg_Sales	Avg_Discount	Max_Cumulative
1	39994.43	3999.44	0.16	39994.43
2	37451.69	3745.17	0.16	37451.69
3	38846.26	3884.63	0.15	38846.26
4	33556.92	3355.69	0.13	33556.92
5	39832.42	3983.24	0.14	39832.42

ggplot(sales_data, aes(x = day, y = cumulative,
                       color = factor(sales_id), group = sales_id)) +
  geom_line(size = 1.4) +
  geom_point(size = 2.8) +
  scale_color_brewer(palette = "Set1") +
  labs(
    title    = "Cumulative Sales per Salesperson",
    subtitle = "Over 10 Working Days",
    x = "Day", y = "Cumulative Sales ($)", color = "Salesperson ID"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title      = element_text(face = "bold", color = "#2c3e50", size = 15),
    legend.position = "top",
    panel.grid.minor = element_blank()
  )

Task 3: Multi-Level Performance Categorization

Task 3 The function categorize_performance(sales_amount) iterates through a vector and assigns one of 5 performance tiers: Excellent, Very Good, Good, Average, or Poor. Distribution is shown via bar chart and pie chart.

categorize_performance <- function(sales_amount) {
  categories <- character(length(sales_amount))

  # Loop through each sales value and assign category
  for (i in seq_along(sales_amount)) {
    val <- sales_amount[i]
    if      (val >= 5000) categories[i] <- "Excellent"
    else if (val >= 4000) categories[i] <- "Very Good"
    else if (val >= 3000) categories[i] <- "Good"
    else if (val >= 1500) categories[i] <- "Average"
    else                  categories[i] <- "Poor"
  }
  return(categories)
}

sales_data$performance <- categorize_performance(sales_data$sales_amount)

# Build distribution table with percentages
perf_table           <- as.data.frame(table(sales_data$performance))
colnames(perf_table) <- c("Category", "Count")
perf_table$Percentage <- round(perf_table$Count / nrow(sales_data) * 100, 1)
perf_table$Category   <- factor(perf_table$Category,
  levels = c("Poor", "Average", "Good", "Very Good", "Excellent"))
perf_table <- perf_table[order(perf_table$Category), ]

knitr::kable(perf_table, caption = "Performance Category Distribution")

Performance Category Distribution
	Category	Count	Percentage
4	Poor	7	14
1	Average	9	18
3	Good	8	16
5	Very Good	9	18
2	Excellent	17	34

pal <- c("Poor"      = "#e74c3c",
         "Average"   = "#e67e22",
         "Good"      = "#f4d03f",
         "Very Good" = "#2980b9",
         "Excellent" = "#27ae60")

p_bar <- ggplot(perf_table, aes(x = Category, y = Percentage, fill = Category)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = paste0(Percentage, "%")), vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = pal) +
  labs(title = "Performance Distribution — Bar Chart",
       x = NULL, y = "Percentage (%)") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"),
        panel.grid.major.x = element_blank())

p_pie <- ggplot(perf_table, aes(x = "", y = Percentage, fill = Category)) +
  geom_col(width = 1, color = "white", size = 0.8) +
  coord_polar("y") +
  scale_fill_manual(values = pal) +
  labs(title = "Performance Distribution — Pie Chart", fill = "Category") +
  theme_void() +
  theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12))

grid.arrange(p_bar, p_pie, ncol = 2)

Task 4: Multi-Company Dataset Simulation

Task 4 The function generate_company_data(n_company, n_employees) uses nested loops to build a multi-company HR dataset. Conditional logic flags top performers (KPI > 90). Output includes a summary table and comparative plots.

generate_company_data <- function(n_company, n_employees) {
  set.seed(123)
  departments <- c("HR", "Finance", "Marketing", "IT", "Operations")
  all_data    <- data.frame()

  # Outer loop: per company
  for (c_id in 1:n_company) {
    # Inner loop: per employee
    for (e_id in 1:n_employees) {
      all_data <- rbind(all_data, data.frame(
        company_id        = paste0("C", c_id),
        employee_id       = paste0("E", c_id, "_", e_id),
        salary            = round(runif(1, 3000, 15000), 2),
        department        = sample(departments, 1),
        performance_score = round(runif(1, 50, 100), 1),
        KPI_score         = round(runif(1, 60, 100), 1)
      ))
    }
  }
  return(all_data)
}

company_data <- generate_company_data(n_company = 4, n_employees = 30)

# Summary per company with top performer identification
company_summary <- company_data %>%
  group_by(company_id) %>%
  summarise(
    Avg_Salary      = round(mean(salary), 2),
    Avg_Performance = round(mean(performance_score), 2),
    Max_KPI         = max(KPI_score),
    Top_Performers  = sum(KPI_score > 90),   # Conditional: KPI > 90
    .groups = "drop"
  )

knitr::kable(company_summary, caption = "Company Summary with Top Performers (KPI > 90)")

Company Summary with Top Performers (KPI > 90)
company_id	Avg_Salary	Avg_Performance	Max_KPI	Top_Performers
C1	8430.26	77.12	99.4	9
C2	8710.82	70.03	98.9	9
C3	8660.28	76.99	95.2	5
C4	8860.94	75.40	99.7	6

p1 <- ggplot(company_summary,
             aes(x = company_id, y = Avg_Salary, fill = company_id)) +
  geom_col(show.legend = FALSE, width = 0.6) +
  geom_text(aes(label = scales::comma(round(Avg_Salary))),
            vjust = -0.5, size = 3.8, fontface = "bold") +
  scale_fill_manual(values = c("#2980b9","#1abc9c","#e67e22","#9b59b6")) +
  labs(title = "Average Salary per Company",
       x = "Company", y = "Avg Salary ($)") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"),
        panel.grid.major.x = element_blank())

p2 <- ggplot(company_summary,
             aes(x = company_id, y = Top_Performers, fill = company_id)) +
  geom_col(show.legend = FALSE, width = 0.6) +
  geom_text(aes(label = Top_Performers),
            vjust = -0.5, size = 4.5, fontface = "bold") +
  scale_fill_manual(values = c("#27ae60","#2ecc71","#16a085","#1abc9c")) +
  labs(title = "Top Performers per Company (KPI > 90)",
       x = "Company", y = "Number of Employees") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"),
        panel.grid.major.x = element_blank())

grid.arrange(p1, p2, ncol = 2)

Task 5: Monte Carlo Simulation — Pi & Probability

Task 5 The function monte_carlo_pi(n_points) estimates $\pi$ using random point sampling and computes the probability of points falling inside a sub-square $[-0.5, 0.5]^2$. Points inside vs. outside the unit circle are visualized.

monte_carlo_pi <- function(n_points) {
  set.seed(99)
  x <- runif(n_points, -1, 1)
  y <- runif(n_points, -1, 1)

  inside_circle    <- 0
  inside_subsquare <- 0

  # Loop to count points in circle and sub-square
  for (i in 1:n_points) {
    dist <- x[i]^2 + y[i]^2
    if (dist <= 1) inside_circle <- inside_circle + 1

    if (abs(x[i]) <= 0.5 && abs(y[i]) <= 0.5)
      inside_subsquare <- inside_subsquare + 1
  }

  list(
    pi_estimate    = 4 * inside_circle / n_points,
    prob_subsquare = inside_subsquare / n_points,
    inside         = (x^2 + y^2) <= 1,
    x = x, y = y
  )
}

mc <- monte_carlo_pi(10000)

cat(sprintf("Pi Estimate          : %.6f\n", mc$pi_estimate))

## Pi Estimate          : 3.146800

cat(sprintf("True Pi (actual)     : %.6f\n", pi))

## True Pi (actual)     : 3.141593

cat(sprintf("Absolute Error       : %.6f\n", abs(mc$pi_estimate - pi)))

## Absolute Error       : 0.005207

cat(sprintf("Relative Error       : %.4f%%\n",
            abs(mc$pi_estimate - pi) / pi * 100))

## Relative Error       : 0.1658%

cat(sprintf("P(point in sub-sq.)  : %.4f\n",  mc$prob_subsquare))

## P(point in sub-sq.)  : 0.2542

cat(sprintf("Theoretical P        : 0.2500 (area 1/4 of full square)\n"))

## Theoretical P        : 0.2500 (area 1/4 of full square)

plot_df <- data.frame(x = mc$x, y = mc$y, inside = mc$inside)
idx     <- sample(nrow(plot_df), 3000)   # subsample for speed

ggplot(plot_df[idx, ], aes(x = x, y = y, color = inside)) +
  geom_point(size = 0.7, alpha = 0.65) +
  # Unit circle boundary
  annotate("path",
           x = cos(seq(0, 2 * pi, length.out = 400)),
           y = sin(seq(0, 2 * pi, length.out = 400)),
           color = "#2c3e50", size = 1.1) +
  # Sub-square boundary
  annotate("rect", xmin = -0.5, xmax = 0.5, ymin = -0.5, ymax = 0.5,
           fill = NA, color = "#e67e22", linetype = "dashed", size = 1.1) +
  scale_color_manual(
    values = c("TRUE" = "#2980b9", "FALSE" = "#e74c3c"),
    labels = c("Outside Circle", "Inside Circle")
  ) +
  coord_fixed() +
  labs(
    title    = bquote("Monte Carlo Estimation: " ~ pi ~ " \u2248 " ~
                      .(round(mc$pi_estimate, 5))),
    subtitle = "Orange dashed square = sub-square for probability analysis",
    color = NULL, x = "x", y = "y"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title      = element_text(face = "bold", color = "#2c3e50", size = 14),
    plot.subtitle   = element_text(color = "#7f8c8d"),
    legend.position = "top"
  )

Task 6: Advanced Data Transformation & Feature Engineering

Task 6 Two normalization functions — normalize_columns() (Min-Max) and z_score() (Standardization) — apply loop-based transformations. New features performance_category and salary_bracket are engineered. Before/after distributions are compared via histograms.

# Min-Max Normalization: scales values to [0, 1]
normalize_columns <- function(df) {
  df_norm <- df
  for (col in names(df)[sapply(df, is.numeric)]) {
    mn <- min(df[[col]], na.rm = TRUE)
    mx <- max(df[[col]], na.rm = TRUE)
    df_norm[[col]] <- (df[[col]] - mn) / (mx - mn)
  }
  return(df_norm)
}

# Z-Score Standardization: mean = 0, sd = 1
z_score <- function(df) {
  df_z <- df
  for (col in names(df)[sapply(df, is.numeric)]) {
    df_z[[col]] <- (df[[col]] - mean(df[[col]], na.rm = TRUE)) /
                    sd(df[[col]], na.rm = TRUE)
  }
  return(df_z)
}

df_raw  <- company_data[, c("salary", "performance_score", "KPI_score")]
df_norm <- normalize_columns(df_raw)
df_z    <- z_score(df_raw)

# Feature engineering: create categorical features
company_data$performance_category <- cut(
  company_data$performance_score,
  breaks = c(0, 60, 70, 80, 90, 100),
  labels = c("Poor", "Average", "Good", "Very Good", "Excellent")
)

company_data$salary_bracket <- cut(
  company_data$salary,
  breaks = c(0, 5000, 8000, 11000, 15000),
  labels = c("Low", "Medium", "High", "Very High")
)

cat("=== Before Normalization ===\n")

## === Before Normalization ===

# Gunakan apply untuk merundingkan angka pada kolom numerik saja
print(summary(df_raw))

##      salary      performance_score   KPI_score    
##  Min.   : 3113   Min.   :50.30     Min.   :60.00  
##  1st Qu.: 5606   1st Qu.:62.75     1st Qu.:70.40  
##  Median : 8498   Median :71.40     Median :81.35  
##  Mean   : 8666   Mean   :74.89     Mean   :80.06  
##  3rd Qu.:11719   3rd Qu.:88.70     3rd Qu.:89.38  
##  Max.   :14993   Max.   :99.70     Max.   :99.70

cat("\n=== After Min-Max Normalization ===\n")

## 
## === After Min-Max Normalization ===

# Sama seperti di atas, hindari membungkus summary dengan round secara langsung
print(summary(df_norm))

##      salary       performance_score   KPI_score     
##  Min.   :0.0000   Min.   :0.0000    Min.   :0.0000  
##  1st Qu.:0.2098   1st Qu.:0.2520    1st Qu.:0.2620  
##  Median :0.4533   Median :0.4271    Median :0.5378  
##  Mean   :0.4674   Mean   :0.4977    Mean   :0.5052  
##  3rd Qu.:0.7244   3rd Qu.:0.7773    3rd Qu.:0.7399  
##  Max.   :1.0000   Max.   :1.0000    Max.   :1.0000

make_hist <- function(data, col, title, fill_col) {
  ggplot(data.frame(v = data[[col]]), aes(x = v)) +
    geom_histogram(bins = 15, fill = fill_col, color = "white", alpha = 0.88) +
    labs(title = title, x = col, y = "Frequency") +
    theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold", size = 10),
          panel.grid.minor = element_blank())
}

grid.arrange(
  make_hist(df_raw,  "salary",            "Salary — Raw",                "#aed6f1"),
  make_hist(df_norm, "salary",            "Salary — Min-Max Normalized", "#2980b9"),
  make_hist(df_raw,  "KPI_score",         "KPI Score — Raw",             "#a9dfbf"),
  make_hist(df_norm, "KPI_score",         "KPI Score — Normalized",      "#27ae60"),
  make_hist(df_raw,  "performance_score", "Performance — Raw",           "#fad7a0"),
  make_hist(df_z,    "performance_score", "Performance — Z-Score",       "#e67e22"),
  ncol = 2
)

Task 7: Mini Project — Company KPI Dashboard

Task 7 A dataset of 7 companies × 80 employees is generated. Employees are categorized into KPI tiers using a loop. Visualizations include a grouped bar chart by department, a scatter plot with per-company regression lines, and a faceted KPI tier distribution chart.

set.seed(2024)
big_data <- generate_company_data(n_company = 7, n_employees = 80)

# Loop to assign KPI tier to each employee
kpi_tier <- character(nrow(big_data))
for (i in 1:nrow(big_data)) {
  kpi <- big_data$KPI_score[i]
  if      (kpi >= 90) kpi_tier[i] <- "Platinum"
  else if (kpi >= 80) kpi_tier[i] <- "Gold"
  else if (kpi >= 70) kpi_tier[i] <- "Silver"
  else                kpi_tier[i] <- "Bronze"
}
big_data$KPI_tier <- factor(kpi_tier,
  levels = c("Bronze", "Silver", "Gold", "Platinum"))

cat("Dataset dimensions:", nrow(big_data), "rows ×", ncol(big_data), "columns\n")

## Dataset dimensions: 560 rows × 7 columns

head(big_data, 6)

##   company_id employee_id   salary department performance_score KPI_score
## 1         C1        E1_1  6450.93  Marketing              94.2      97.6
## 2         C1        E1_2  3546.68    Finance              94.6      82.1
## 3         C1        E1_3  8479.38         IT              72.7      87.1
## 4         C1        E1_4  9871.60    Finance              95.0      69.8
## 5         C1        E1_5  3504.71  Marketing              97.7      95.6
## 6         C1        E1_6 11313.64         HR              99.7      86.2
##   KPI_tier
## 1 Platinum
## 2     Gold
## 3     Gold
## 4   Bronze
## 5 Platinum
## 6     Gold

kpi_summary <- big_data %>%
  group_by(company_id) %>%
  summarise(
    Employees      = n(),
    Avg_Salary     = round(mean(salary), 2),
    Avg_KPI        = round(mean(KPI_score), 2),
    Top_Performers = sum(KPI_score >= 90),
    Pct_Top        = paste0(round(sum(KPI_score >= 90) / n() * 100, 1), "%"),
    .groups = "drop"
  )

knitr::kable(kpi_summary, caption = "KPI Dashboard Summary per Company")

KPI Dashboard Summary per Company
company_id	Employees	Avg_Salary	Avg_KPI	Top_Performers	Pct_Top
C1	80	8376.95	82.03	22	27.5%
C2	80	9221.18	77.62	16	20%
C3	80	9025.40	78.07	13	16.2%
C4	80	8777.38	80.58	21	26.2%
C5	80	9147.43	81.04	25	31.2%
C6	80	9246.89	80.10	19	23.8%
C7	80	8800.90	82.36	27	33.8%

dept_summary <- big_data %>%
  group_by(company_id, department) %>%
  summarise(avg_salary = round(mean(salary), 2), .groups = "drop")

# Plot 1: Grouped bar chart — avg salary by dept & company
p1 <- ggplot(dept_summary, aes(x = department, y = avg_salary, fill = company_id)) +
  geom_col(position = "dodge", width = 0.75) +
  scale_fill_brewer(palette = "Set2") +
  labs(title    = "Average Salary by Department & Company",
       x = "Department", y = "Average Salary ($)", fill = "Company") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x  = element_text(angle = 25, hjust = 1),
        plot.title    = element_text(face = "bold", size = 13),
        panel.grid.major.x = element_blank())

# Plot 2: Scatter plot — salary vs KPI with regression lines
p2 <- ggplot(big_data, aes(x = salary, y = KPI_score, color = company_id)) +
  geom_point(alpha = 0.4, size = 1.8) +
  geom_smooth(method = "lm", se = FALSE, size = 1, aes(group = company_id)) +
  scale_color_brewer(palette = "Set1") +
  labs(title    = "Salary vs KPI Score with Regression Lines per Company",
       x = "Salary ($)", y = "KPI Score", color = "Company") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 13),
        panel.grid.minor = element_blank())

# Plot 3: Faceted KPI tier distribution
tier_pal <- c("Bronze"   = "#cd7f32",
              "Silver"   = "#bdc3c7",
              "Gold"     = "#f4d03f",
              "Platinum" = "#85c1e9")

p3 <- ggplot(big_data, aes(x = KPI_tier, fill = KPI_tier)) +
  geom_bar(width = 0.7) +
  facet_wrap(~company_id, nrow = 2) +
  scale_fill_manual(values = tier_pal) +
  labs(title = "KPI Tier Distribution per Company",
       x = "KPI Tier", y = "Number of Employees") +
  theme_minimal(base_size = 11) +
  theme(legend.position  = "none",
        axis.text.x      = element_text(angle = 30, hjust = 1),
        plot.title       = element_text(face = "bold", size = 13),
        strip.text       = element_text(face = "bold"))

grid.arrange(p1, p2, p3, ncol = 1)

Task 8 (Bonus): Automated Report Generation

Bonus Using functions + loops, an automated summary report is generated for each company. Each report includes key statistics and a department headcount table. Data is also exported to CSV files.

for (co in unique(big_data$company_id)) {
  cat("\n### Company:", co, "\n\n")
  sub <- big_data[big_data$company_id == co, ]

  cat('<div class="report-card">\n\n')
  cat(sprintf("- **Total Employees:** %d  \n", nrow(sub)))
  cat(sprintf("- **Average Salary:** $%s  \n",
              format(round(mean(sub$salary), 2), big.mark = ",")))
  cat(sprintf("- **Average KPI Score:** %.2f  \n", mean(sub$KPI_score)))
  cat(sprintf("- **Average Performance:** %.2f  \n", mean(sub$performance_score)))
  cat(sprintf("- **Top Performers (KPI >= 90):** %d employees  \n\n",
              sum(sub$KPI_score >= 90)))

  dept_tbl           <- as.data.frame(table(sub$department))
  colnames(dept_tbl) <- c("Department", "Headcount")
  print(knitr::kable(dept_tbl, format = "html"))
  cat('\n</div>\n\n')
}

Company: C1

Total Employees: 80
Average Salary: $8,376.95
Average KPI Score: 82.03
Average Performance: 74.63
Top Performers (KPI >= 90): 22 employees

Department	Headcount
Finance	15
HR	14
IT	16
Marketing	18
Operations	17

Company: C2

Total Employees: 80
Average Salary: $9,221.18
Average KPI Score: 77.62
Average Performance: 75.54
Top Performers (KPI >= 90): 16 employees

Department	Headcount
Finance	11
HR	10
IT	23
Marketing	23
Operations	13

Company: C3

Total Employees: 80
Average Salary: $9,025.4
Average KPI Score: 78.07
Average Performance: 73.22
Top Performers (KPI >= 90): 13 employees

Department	Headcount
Finance	15
HR	14
IT	17
Marketing	15
Operations	19

Company: C4

Total Employees: 80
Average Salary: $8,777.38
Average KPI Score: 80.58
Average Performance: 74.81
Top Performers (KPI >= 90): 21 employees

Department	Headcount
Finance	13
HR	21
IT	13
Marketing	12
Operations	21

Company: C5

Total Employees: 80
Average Salary: $9,147.43
Average KPI Score: 81.05
Average Performance: 77.31
Top Performers (KPI >= 90): 25 employees

Department	Headcount
Finance	13
HR	19
IT	16
Marketing	19
Operations	13

Company: C6

Total Employees: 80
Average Salary: $9,246.89
Average KPI Score: 80.10
Average Performance: 73.11
Top Performers (KPI >= 90): 19 employees

Department	Headcount
Finance	15
HR	18
IT	13
Marketing	15
Operations	19

Company: C7

Total Employees: 80
Average Salary: $8,800.9
Average KPI Score: 82.36
Average Performance: 72.27
Top Performers (KPI >= 90): 27 employees

Department	Headcount
Finance	18
HR	17
IT	14
Marketing	19
Operations	12

# Optional: export datasets to CSV
write.csv(big_data,    "company_data_full.csv",       row.names = FALSE)
write.csv(kpi_summary, "kpi_summary_per_company.csv", row.names = FALSE)
cat("Data successfully exported to CSV.\n")

Conclusion

Summary of Concepts per Task
Task	Title	Key Concepts
Task 1	Dynamic Multi-Formula Function	switch(), nested loop, input validation
Task 2	Nested Simulation: Multi-Sales & Discounts	Nested function, conditional discount, loop
Task 3	Multi-Level Performance Categorization	Vector loop, nested if-else, visualization
Task 4	Multi-Company Dataset Simulation	Nested loop, conditional logic (KPI > 90)
Task 5	Monte Carlo Simulation: Pi & Probability	Monte Carlo loop, pi estimation, probability
Task 6	Advanced Data Transformation & Feature Engineering	Loop-based normalization, feature engineering
Task 7	Mini Project: Company KPI Dashboard	KPI tier loop, grouped charts, scatter + regression
Task 8 (Bonus)	Automated Report Generation	Automated loop per company, CSV export

Functions, Loops & Conditionals in R

Programming Practicum Week 5

Dhea Putri Khasanah

April 05, 2026