PRACTICUM

Assignment Week 5

1 Dynamic Multi-Formula Function

par(mar=c(4,4,2,1))  # ini ngatur margin biar ga terlalu jauh

compute_formula <- function(x, formula_type) {
  if (formula_type == "linear") {
    return(2*x + 3)
  } else if (formula_type == "quadratic") {
    return(x^2 + 2*x + 1)
  } else if (formula_type == "cubic") {
    return(x^3 - x^2 + x)
  } else if (formula_type == "exponential") {
    return(exp(x/5))
  } else {
    return(NULL)
  }
}

x <- 1:20

linear <- compute_formula(x, "linear")
quadratic <- compute_formula(x, "quadratic")
cubic <- compute_formula(x, "cubic")
exponential <- compute_formula(x, "exponential")

plot(x, linear, type="o", col="#1f77b4", lwd=2,
     ylim=range(c(linear, quadratic, cubic, exponential)),
     main="Comparison of Mathematical Functions",
     xlab="X values", ylab="Y values",
     cex.main=1, cex.lab=0.9)

lines(x, quadratic, type="o", col="#ff7f0e", lwd=2)
lines(x, cubic, type="o", col="#2ca02c", lwd=2)
lines(x, exponential, type="o", col="#d62728", lwd=2)

legend("topleft",
       legend=c("Linear","Quadratic","Cubic","Exponential"),
       col=c("#1f77b4","#ff7f0e","#2ca02c","#d62728"),
       lty=1, pch=1, cex=0.8, bty="n")

1.1 Explanation

This function is used to compute different types of mathematical functions such as linear, quadratic, cubic, and exponential. The function uses conditional statements (if-else) to determine which formula to apply. The input values range from 1 to 20, and the results are visualized in a single plot to compare the behavior of each function. This implementation demonstrates the use of functions, conditional logic, and data visualization in R.

2 Nested Simulation : Multi-Sales & Discounts

library(knitr)
library(kableExtra)
library(dplyr)

# =========================
# FUNCTION
simulate_sales <- function(n_salesperson, days) {
  data <- data.frame()
  
  for (sp in 1:n_salesperson) {
    for (d in 1:days) {
      
      sales <- sample(50:200, 1)
      
      if (sales > 150) {
        discount <- 0.1
      } else {
        discount <- 0.05
      }
      
      final_sales <- sales - (sales * discount)
      
      data <- rbind(data, data.frame(
        salesperson = sp,
        day = d,
        sales = sales,
        discount = discount,
        final_sales = final_sales
      ))
    }
  }
  
  return(data)
}

# =========================
# GENERATE DATA
df_sales <- simulate_sales(5, 10)

# SUMMARY
summary_sales <- df_sales %>%
  group_by(salesperson) %>%
  summarise(total_sales = sum(final_sales))

# UBAH HEADER CAPS
colnames(df_sales) <- toupper(colnames(df_sales))
colnames(summary_sales) <- toupper(colnames(summary_sales))

# =========================
# TABEL 1 (GEDE + ESTETIK + JUDUL BESAR)
kable(head(df_sales),
      caption = "<span style='font-size:18px; font-weight:bold;'>Sample of Simulated Sales Data</span>",
      align = "c", escape = FALSE) %>%
  kable_styling(full_width = TRUE,
                position = "center",
                font_size = 16,
                bootstrap_options = c("striped")) %>%
  row_spec(0, bold = TRUE, color = "black", background = "#B7E4C7") %>%
  row_spec(1:6, color = "black", background = "#F1FAEE")

Sample of Simulated Sales Data
SALESPERSON	DAY	SALES	DISCOUNT	FINAL_SALES
1	1	72	0.05	68.40
1	2	140	0.05	133.00
1	3	183	0.10	164.70
1	4	139	0.05	132.05
1	5	98	0.05	93.10
1	6	177	0.10	159.30

# =========================
# TABEL 2 (GEDE + ESTETIK + JUDUL BESAR)
kable(summary_sales,
      caption = "<span style='font-size:18px; font-weight:bold;'>Total Sales by Each Salesperson</span>",
      align = "c", escape = FALSE) %>%
  kable_styling(full_width = TRUE,
                position = "center",
                font_size = 16,
                bootstrap_options = c("striped")) %>%
  row_spec(0, bold = TRUE, color = "black", background = "#A2D2FF") %>%
  row_spec(1:nrow(summary_sales), color = "black", background = "#EDF6F9")

Total Sales by Each Salesperson
SALESPERSON	TOTAL_SALES
1	1259.85
2	1061.40
3	1328.55
4	1187.10
5	1300.90

# =========================
# PLOT (RAPI + PROPORSIONAL)
par(mar=c(4,4,2,1), cex.axis=0.9, cex.lab=1, cex.main=1.1)

barplot(summary_sales$TOTAL_SALES,
        names.arg = summary_sales$SALESPERSON,
        col = c("#A0C4FF", "#BDB2FF", "#FFC6FF", "#FFD6A5", "#CAFFBF"),
        border = NA,
        main = "Total Sales per Salesperson",
        xlab = "Salesperson",
        ylab = "Total Sales")

2.1 Explanation

This task simulates sales data for multiple salespersons across several days using nested loops. The outer loop represents each salesperson, while the inner loop represents daily sales. Conditional logic is applied to determine discount rates based on sales performance. The generated data is summarized to calculate total sales per salesperson. The results are presented in well-formatted tables and visualized using a bar chart with improved aesthetics.

3 Multi-Level Performance Categorization

# ======================
# LIBRARY
# ======================
library(kableExtra)

# ======================
# DATA
# ======================
set.seed(1)
sales_amount <- sample(50:200, 50, replace=TRUE)

# ======================
# FUNCTION
# ======================
categorize_performance <- function(x){
  if(x >= 180){
    "Excellent"
  } else if(x >= 150){
    "Very Good"
  } else if(x >= 120){
    "Good"
  } else if(x >= 80){
    "Average"
  } else {
    "Poor"
  }
}

# ======================
# APPLY
# ======================
category <- sapply(sales_amount, categorize_performance)
category_count <- table(category)

# ======================
# TABLE (RAPI & SOFT)
# ======================
category_df <- data.frame(
  Category = names(category_count),
  Count = as.numeric(category_count)
)

kable(category_df,
      align = "c") %>%
  kable_styling(
    full_width = TRUE   # 🔥 ini biar ga kecil lagi
  ) %>%
  row_spec(0,
           bold = TRUE,
           background = "#FFE5EC",
           color = "black")

Category	Count
Average	16
Excellent	3
Good	8
Poor	8
Very Good	15

# ======================
# PLOT
# ======================
par(mfrow=c(2,1))

# BAR CHART
par(mar=c(4,4,2,2))
barplot(category_count,
        col=c("#FFADAD","#FFD6A5","#FDFFB6","#CAFFBF","#A0C4FF"),
        main="Performance Count",
        xlab="Category",
        ylab="Count",
        cex.main=1.1,
        cex.lab=0.9,
        cex.names=0.8)

# PIE CHART
par(mar=c(4,4,2,6))
pie(category_count,
    labels=NA,
    col=c("#FFADAD","#FFD6A5","#FDFFB6","#CAFFBF","#A0C4FF"),
    main="Performance Distribution",
    radius=1,
    cex.main=1.1)

legend("topright",
       legend=paste(names(category_count), "(", category_count, ")"),
       fill=c("#FFADAD","#FFD6A5","#FDFFB6","#CAFFBF","#A0C4FF"),
       cex=0.8,
       bty="n")

3.1 Explanation

This task categorizes sales performance using a custom function with conditional logic. Each sales value is classified into five categories: Excellent, Very Good, Good, Average, and Poor based on predefined thresholds. A loop is used to apply the categorization function to all sales data. The results are then summarized to calculate the percentage distribution of each performance category. The output includes a sample data table, a summary table with percentages, and visualizations using both bar chart and pie chart to clearly illustrate the distribution of performance levels.

4 Multi-Company Dataset Simulation

library(knitr)
library(kableExtra)
library(dplyr)

# =========================
# FUNCTION
generate_company_data <- function(n_company, n_employees) {
  data <- data.frame()
  
  departments <- c("HR", "Finance", "IT", "Marketing")
  
  for (c in 1:n_company) {
    for (e in 1:n_employees) {
      
      salary <- sample(3000:10000, 1)
      performance <- sample(60:100, 1)
      kpi <- sample(50:100, 1)
      dept <- sample(departments, 1)
      
      # conditional: top performer
      if (kpi > 90) {
        status <- "Top Performer"
      } else {
        status <- "Normal"
      }
      
      data <- rbind(data, data.frame(
        COMPANY_ID = c,
        EMPLOYEE_ID = e,
        SALARY = salary,
        DEPARTMENT = dept,
        PERFORMANCE_SCORE = performance,
        KPI_SCORE = kpi,
        STATUS = status
      ))
    }
  }
  
  return(data)
}

# =========================
# GENERATE DATA
df_company <- generate_company_data(5, 20)

# =========================
# SAMPLE TABLE
kable(head(df_company),
      caption = "<span style='font-size:18px; font-weight:bold;'>Sample Company Data</span>",
      align = "c", escape = FALSE) %>%
  kable_styling(full_width = TRUE,
                font_size = 14,
                bootstrap_options = c("striped")) %>%
  row_spec(0, bold = TRUE, background = "#CDEAC0") %>%
  row_spec(1:6, background = "#F1FAEE")

Sample Company Data
COMPANY_ID	EMPLOYEE_ID	SALARY	DEPARTMENT	PERFORMANCE_SCORE	KPI_SCORE	STATUS
1	1	9932	Finance	70	69	Normal
1	2	7845	Finance	67	81	Normal
1	3	4484	Marketing	64	95	Top Performer
1	4	5469	IT	69	57	Normal
1	5	8680	Finance	62	53	Normal
1	6	8948	HR	86	90	Normal

# =========================
# SUMMARY PER COMPANY
summary_company <- df_company %>%
  group_by(COMPANY_ID) %>%
  summarise(
    AVG_SALARY = mean(SALARY),
    AVG_PERFORMANCE = mean(PERFORMANCE_SCORE),
    MAX_KPI = max(KPI_SCORE)
  )

# =========================
# SUMMARY TABLE
kable(summary_company,
      caption = "<span style='font-size:18px; font-weight:bold;'>Company Summary</span>",
      align = "c", escape = FALSE) %>%
  kable_styling(full_width = TRUE,
                font_size = 14,
                bootstrap_options = c("striped")) %>%
  row_spec(0, bold = TRUE, background = "#BDE0FE") %>%
  row_spec(1:nrow(summary_company), background = "#EDF6F9")

Company Summary
COMPANY_ID	AVG_SALARY	AVG_PERFORMANCE	MAX_KPI
1	6592.45	79.15	100
2	6038.10	80.15	97
3	7215.20	82.15	100
4	6427.75	81.10	100
5	6671.20	80.45	100

# =========================
# PLOT (AVG SALARY)
par(mfrow=c(1,2), mar=c(4,4,2,1))

barplot(summary_company$AVG_SALARY,
        names.arg = summary_company$COMPANY_ID,
        col = c("#A0C4FF","#BDB2FF","#FFC6FF","#FFD6A5","#CAFFBF"),
        main = "Average Salary per Company",
        xlab = "Company",
        ylab = "Salary")

# =========================
# PLOT (AVG PERFORMANCE)
barplot(summary_company$AVG_PERFORMANCE,
        names.arg = summary_company$COMPANY_ID,
        col = c("#FFADAD","#FFD6A5","#FDFFB6","#CAFFBF","#A0C4FF"),
        main = "Average Performance per Company",
        xlab = "Company",
        ylab = "Performance Score")

4.1 Explanation

This task generates a multi-company dataset using nested loops, where the outer loop represents companies and the inner loop represents employees. Each employee is assigned attributes such as salary, department, performance score, and KPI score. Conditional logic is applied to classify employees as “Top Performer” if their KPI score exceeds 90. The data is summarized per company to calculate average salary, average performance, and maximum KPI score. The results are presented in tables and visualized using bar charts.

5 Monte CArlo Simulation: Pi & Probability

Estimated Pi: 3.2
Probability (center square): 0.265

5.1 Explanation

This task uses Monte Carlo simulation to estimate the value of Pi by generating random points inside a square. Points that fall inside the unit circle are counted to approximate Pi. Additionally, the probability of points falling within a smaller central square is calculated. The visualization shows the distribution of points inside and outside the circle.

6 Advanced Data Transformation & Feature Engineering

library(knitr)
library(kableExtra)

# =========================
# DATA
df <- df_company

# =========================
# FUNCTION NORMALIZATION (LOOP BASED)
normalize_columns <- function(df) {
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {
      df[[paste0(col, "_NORM")]] <- (df[[col]] - min(df[[col]])) / 
                                   (max(df[[col]]) - min(df[[col]]))
    }
  }
  return(df)
}

# =========================
# FUNCTION Z-SCORE
z_score <- function(df) {
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {
      df[[paste0(col, "_Z")]] <- (df[[col]] - mean(df[[col]])) / sd(df[[col]])
    }
  }
  return(df)
}

# =========================
# APPLY FUNCTIONS
df <- normalize_columns(df)
df <- z_score(df)

# =========================
# FEATURE BARU
df$SALARY_BRACKET <- ifelse(df$SALARY > 7000, "High", "Low")

df$PERFORMANCE_CATEGORY <- ifelse(df$PERFORMANCE_SCORE > 85, "High", "Low")

# =========================
# SAMPLE TABLE
kable(head(df),
      caption = "<span style='font-size:18px; font-weight:bold;'>Transformed Data Sample</span>",
      align = "c", escape = FALSE) %>%
  kable_styling(full_width = TRUE,
                font_size = 14,
                bootstrap_options = c("striped")) %>%
  row_spec(0, bold = TRUE, background = "#CDB4DB") %>%
  row_spec(1:6, background = "#F3E8FF")

Transformed Data Sample
COMPANY_ID	EMPLOYEE_ID	SALARY	DEPARTMENT	PERFORMANCE_SCORE	KPI_SCORE	STATUS	EMPLOYEE_ID_NORM	SALARY_NORM	PERFORMANCE_SCORE_NORM	KPI_SCORE_NORM	COMPANY_ID_Z	EMPLOYEE_ID_Z	SALARY_Z	PERFORMANCE_SCORE_Z	KPI_SCORE_Z	COMPANY_ID_NORM_Z	EMPLOYEE_ID_NORM_Z	SALARY_NORM_Z	PERFORMANCE_SCORE_NORM_Z	KPI_SCORE_NORM_Z	SALARY_BRACKET	PERFORMANCE_CATEGORY
1	1	9932	Finance	70	69	Normal	0.0000000	0.9965222	0.250	0.38	-1.407125	-1.6392507	1.6135348	-0.8912456	-0.4022936	-1.407125	-1.6392507	1.6135348	-0.8912456	-0.4022936	High	Low
1	2	7845	Finance	67	81	Normal	0.0526316	0.6941023	0.175	0.62	-1.407125	-1.4666980	0.6062399	-1.1434850	0.4215158	-1.407125	-1.4666980	0.6062399	-1.1434850	0.4215158	High	Low
1	3	4484	Marketing	64	95	Top Performer	0.1052632	0.2070714	0.100	0.90	-1.407125	-1.2941453	-1.0159536	-1.3957243	1.3826268	-1.407125	-1.2941453	-1.0159536	-1.3957243	1.3826268	Low	Low
1	4	5469	IT	69	57	Normal	0.1578947	0.3498044	0.225	0.14	-1.407125	-1.1215926	-0.5405413	-0.9753254	-1.2261030	-1.407125	-1.1215926	-0.5405413	-0.9753254	-1.2261030	Low	Low
1	5	8680	Finance	62	53	Normal	0.2105263	0.8150993	0.050	0.06	-1.407125	-0.9490399	1.0092544	-1.5638838	-1.5007061	-1.407125	-0.9490399	1.0092544	-1.5638838	-1.5007061	High	Low
1	6	8948	HR	86	90	Normal	0.2631579	0.8539342	0.650	0.80	-1.407125	-0.7764872	1.1386051	0.4540308	1.0393729	-1.407125	-0.7764872	1.1386051	0.4540308	1.0393729	High	High

# =========================
# VISUALISASI
par(mar=c(4,4,2,1), cex.main=1, cex.lab=0.9, cex.axis=0.8)

# HISTOGRAM
hist(df$SALARY,
     col="#FFADAD",
     main="Salary (Before)",
     xlab="Salary")

# BOXPLOT
boxplot(df$SALARY_NORM,
        col="#A0C4FF",
        main="Salary (Normalized)",
        ylab="Normalized Value")

6.1 Explanation

This task applies data transformation using normalization and z-score techniques through custom functions. Loop-based normalization is used to transform all numeric columns. Additional features such as salary bracket and performance category are created for better classification. The comparison between original and transformed data is visualized using histograms and boxplots.

7 Mini Project: Company KPI Dashboard & Simulation

library(knitr)
library(kableExtra)
library(dplyr)

# =========================
# GENERATE DATASET
generate_data <- function(n_company, n_employee) {
  data <- data.frame()
  departments <- c("HR","IT","Finance","Marketing")
  
  for (c in 1:n_company) {
    for (e in 1:n_employee) {
      
      data <- rbind(data, data.frame(
        COMPANY_ID = c,
        EMPLOYEE_ID = e,
        SALARY = sample(3000:10000,1),
        PERFORMANCE_SCORE = sample(60:100,1),
        KPI_SCORE = sample(50:100,1),
        DEPARTMENT = sample(departments,1)
      ))
    }
  }
  return(data)
}

df <- generate_data(5, 50)

# =========================
# KPI CATEGORY (LOOP)
kpi_cat <- c()
for (k in df$KPI_SCORE) {
  if (k >= 90) {
    kpi_cat <- c(kpi_cat, "Top")
  } else if (k >= 75) {
    kpi_cat <- c(kpi_cat, "Medium")
  } else {
    kpi_cat <- c(kpi_cat, "Low")
  }
}
df$KPI_CATEGORY <- kpi_cat

# =========================
# SUMMARY
summary_company <- df %>%
  group_by(COMPANY_ID) %>%
  summarise(
    AVG_SALARY = mean(SALARY),
    AVG_KPI = mean(KPI_SCORE),
    TOP_PERFORMERS = sum(KPI_SCORE >= 90)
  )

# =========================
# TABLE (LEBIH CAKEP)
kable(head(df),
      caption="<span style='font-size:20px; font-weight:bold;'>Sample KPI Dataset</span>",
      align="c", escape=FALSE) %>%
  kable_styling(full_width=TRUE, font_size=15) %>%
  row_spec(0, bold=TRUE, color="white", background="#6D597A") %>%
  row_spec(1:6, background="#F2E9E4")

Sample KPI Dataset
COMPANY_ID	EMPLOYEE_ID	SALARY	PERFORMANCE_SCORE	KPI_SCORE	DEPARTMENT	KPI_CATEGORY
1	1	5272	68	60	HR	Low
1	2	3353	70	74	HR	Low
1	3	9461	73	99	Finance	Top
1	4	8075	99	73	HR	Low
1	5	7700	69	98	Marketing	Top
1	6	8062	81	93	Finance	Top

kable(summary_company,
      caption="<span style='font-size:20px; font-weight:bold;'>Company KPI Summary</span>",
      align="c", escape=FALSE) %>%
  kable_styling(full_width=TRUE, font_size=15) %>%
  row_spec(0, bold=TRUE, color="white", background="#355070") %>%
  row_spec(1:nrow(summary_company), background="#E3D5CA")

Company KPI Summary
COMPANY_ID	AVG_SALARY	AVG_KPI	TOP_PERFORMERS
1	6724.70	76.66	11
2	6281.74	72.64	6
3	6348.42	75.66	13
4	6049.06	75.12	11
5	6725.80	75.78	8

# =========================
# PLOT (KECIL + ATAS BAWAH + JUDUL NORMAL)
par(mfrow=c(2,1),
    mar=c(4,4,2.5,1),
    cex.main=0.9,
    cex.lab=0.8,
    cex.axis=0.8)

# BAR CHART
barplot(summary_company$AVG_KPI,
        names.arg=summary_company$COMPANY_ID,
        col=c("#A0C4FF","#BDB2FF","#FFC6FF","#FFD6A5","#CAFFBF"),
        main="Average KPI per Company",
        xlab="Company", ylab="KPI")

# SCATTER
plot(df$SALARY, df$KPI_SCORE,
     col="#90DBF4",
     pch=16, cex=0.7,
     main="Salary vs KPI",
     xlab="Salary",
     ylab="KPI Score")

7.1 Explanation

This mini project generates a dataset for multiple companies and employees. Each employee is assigned attributes such as salary, performance score, KPI score, and department. A loop is used to categorize employees into KPI tiers (Top, Medium, Low). The data is summarized per company to calculate average salary, average KPI, and number of top performers. The results are visualized using bar charts and scatter plots to analyze performance patterns.

8 Automated Report Generation

8.1 Company Summary Report

Company 1

Average Salary: 6724.70
Average KPI: 76.66
Top Performers: 11

Company 2

Average Salary: 6281.74
Average KPI: 72.64
Top Performers: 6

Company 3

Average Salary: 6348.42
Average KPI: 75.66
Top Performers: 13

8.2 Explanation

This task implements an automated report generation using functions and loops. The function processes the dataset and produces summary statistics for each company. For every company, the report includes average salary, average KPI score, and the number of top performers. The use of loops allows the report to be generated dynamically for multiple companies without manual repetition. The results are presented in a structured format to improve readability and provide clear insights into company performance.