Practicum: Functions & Loops
Exersize week~5
library(htmltools)
HTML('
<div class="profile-card">
<div>
<img src="Almetcokkk.JPG">
</div>
<div class="profile-name">Veronica Maria Lucia F Xavier</div>
<div class="divider"></div>
<div class="profile-nim">NIM: 52250021</div>
</div>
')1 Task: Dynamic Multi-Formula Function
In this task, we use Functions and Loops in R to make calculations faster and easier. Instead of writing the same math formula many times, we create a function that can do it for us automatically. We also use loops so the computer can process many numbers at once without us having to do it one by one
# Main function: compute various formulas based on input
compute_formula <- function(x_vals, formula) {
# Validate formula input
valid_formulas <- c("linear", "quadratic", "cubic", "exponential")
if (!formula %in% valid_formulas) {
stop(paste("Invalid formula! Choose one of:",
paste(valid_formulas, collapse = ", ")))
}
# Calculate results based on formula type
results <- numeric(length(x_vals))
for (i in seq_along(x_vals)) {
x <- x_vals[i]
if (formula == "linear") results[i] <- 2 * x + 1
if (formula == "quadratic") results[i] <- x^2 - 3 * x + 2
if (formula == "cubic") results[i] <- x^3 - 2 * x^2 + x
if (formula == "exponential") results[i] <- exp(0.3 * x)
}
return(results)
}
# Calculate all formulas for x = 1:20
x_vals <- 1:20
formulas <- c("linear", "quadratic", "cubic", "exponential")
# Use a nested loop: each formula is calculated and stored in a data frame
all_results <- data.frame()
for (f in formulas) {
y_vals <- compute_formula(x_vals, f)
temp_df <- data.frame(x = x_vals, y = y_vals, formula = f)
all_results <- rbind(all_results, temp_df)
}
# Plot all formulas in one graph
ggplot(all_results, aes(x = x, y = y, color = formula)) +
geom_line(linewidth = 1.2) +
geom_point(size = 1.5) +
labs(title = "Task 1: Mathematical Formula Comparison",
x = "X Value", y = "Y Value", color = "Formula") +
theme_minimal() +
scale_color_brewer(palette = "Set1")2 Task: Nested Simulation - Multi-Sales & Discounts
This task aims to simulate sales activities using R by applying nested functions. The model represents multiple salespersons over a certain number of days, where each sale is influenced by different discount rates based on the sales amount. By running this simulation, we can better understand how sales performance, discounts, and cumulative results change over time.
# 1. Define the Discount Logic Function
apply_discount <- function(sales_amount) {
if (sales_amount > 10000) discount <- 0.20 # 20% off
else if (sales_amount > 7000) discount <- 0.15 # 15% off
else if (sales_amount > 4000) discount <- 0.10 # 10% off
else discount <- 0.05 # 5% off
return(discount)
}
# 2. Define the Simulation Engine
simulate_sales <- function(n_salesperson, days) {
set.seed(42) # Ensures the same results every time
sales_data <- data.frame()
for (s in 1:n_salesperson) {
cumulative <- 0
for (d in 1:days) {
# Generate random sales between 1,000 and 15,000
amount <- round(runif(1, min = 1000, max = 15000), 2)
discount <- apply_discount(amount)
cumulative <- cumulative + amount
# Create a single row for this day
row <- data.frame(
sales_id = s,
day = d,
sales_amount = amount,
discount_rate = discount,
net_sales = amount * (1 - discount),
cumulative_sales = cumulative
)
sales_data <- rbind(sales_data, row)
}
}
return(sales_data)
}
# 3. Run the Simulation
df_sales <- simulate_sales(n_salesperson = 5, days = 10)
# 4. Create a Summary Table
summary_performance <- df_sales %>%
group_by(sales_id) %>%
summarise(
Total_Revenue = sum(sales_amount),
Average_Sales = mean(sales_amount),
Avg_Discount_Pct = mean(discount_rate) * 100,
Final_Cumulative = max(cumulative_sales),
.groups = "drop"
)
print(summary_performance)## # A tibble: 5 × 5
## sales_id Total_Revenue Average_Sales Avg_Discount_Pct Final_Cumulative
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 99077. 9908. 16.5 99077.
## 2 2 92604. 9260. 15.5 92604.
## 3 3 96154. 9615. 15 96154.
## 4 4 82690. 8269. 14 82690.
## 5 5 98664. 9866. 15.5 98664.
ggplot(df_sales, aes(x = day,
y = cumulative_sales,
color = factor(sales_id),
group = sales_id)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
scale_color_brewer(palette = "Dark2") +
labs(
title = "Cumulative Sales Performance",
subtitle = "Tracking 10-day revenue growth per salesperson",
x = "Simulation Day",
y = "Total Sales (Currency)",
color = "Staff ID"
) +
theme_minimal() +
theme(legend.position = "bottom")3 Task: Multi-Level Performance Categorization
This task focuses on classifying sales performance into different levels based on sales volume. By using a function, each sales value is grouped into categories such as Excellent, Very Good, Good, Average, and Poor. This helps to clearly understand the distribution of performance and makes it easier to analyze overall sales effectiveness.
library(tidyr)
df_sales <- df_sales %>%
mutate(performance = case_when(
sales_amount >= 12000 ~ "Excellent",
sales_amount >= 9000 ~ "Very Good",
sales_amount >= 6000 ~ "Good",
sales_amount >= 3000 ~ "Average",
TRUE ~ "Poor"
)) %>%
mutate(performance = factor(performance,
levels = c("Excellent", "Very Good", "Good", "Average", "Poor")))
perf_summary <- df_sales %>%
count(performance) %>%
mutate(percentage = n / sum(n),
label = paste0(performance, "\n", scales::percent(percentage, accuracy = 0.1)))
ggplot(perf_summary, aes(x = "", y = percentage, fill = performance)) +
geom_bar(stat = "identity", width = 1, color = "white", linewidth = 0.8) +
coord_polar("y", start = 0) +
geom_text(aes(label = label),
position = position_stack(vjust = 0.5),
size = 4,
fontface = "bold",
color = "gray10") +
scale_fill_brewer(palette = "RdYlGn", direction = -1) +
labs(title = "Performance Category Distribution",
subtitle = "Analysis based on total sales volume",
caption = "Source: Internal Sales Data") +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray30"),
legend.position = "none"
)4 Task: Simulate Dataset Multi-Company
This task simulates a multi-company employee dataset using nested loops and conditional logic. A total of 4 companies are generated, each with 20 employees. Each employee has attributes including salary, department, performance score, and KPI score. Employees with a KPI score above 90 are flagged as top performers. The output includes a summary table per company and visualizations of KPI and salary distributions.
library(dplyr)
library(ggplot2)
# Function to calculate KPI score based on performance score
calculate_kpi <- function(performance_score) {
# High performers get higher KPI bonus
if (performance_score >= 85) {
kpi <- performance_score + runif(1, 5, 15)
} else if (performance_score >= 70) {
kpi <- performance_score + runif(1, 0, 10)
} else {
kpi <- performance_score - runif(1, 0, 5)
}
# Ensure KPI does not exceed 100
return(min(round(kpi, 1), 100))
}
# Main function: generate employee dataset from multiple companies
generate_company_data <- function(num_companies, num_employees) {
set.seed(123)
departments <- c("Finance", "Marketing", "Operations", "HR", "Technology")
all_data <- data.frame()
# Outer loop: companies
for (p in 1:num_companies) {
# Inner loop: employees
for (k in 1:num_employees) {
performance_score <- round(runif(1, min = 50, max = 100), 1)
kpi_score <- calculate_kpi(performance_score)
salary <- round(runif(1, min = 4000000, max = 20000000), 0)
department <- sample(departments, 1)
# Mark top performers
top_performer <- ifelse(kpi_score > 90, "Yes", "No")
row <- data.frame(
company_id = paste0("Company-", p),
employee_id = paste0("E", p, "-", k),
salary = salary,
department = department,
performance_score= performance_score,
kpi_score = kpi_score,
top_performer = top_performer
)
all_data <- rbind(all_data, row)
}
}
return(all_data)
}
# Generate dataset: 4 companies, 20 employees each
company_data <- generate_company_data(num_companies = 4, num_employees = 20)
# Summary statistics per company
company_summary <- company_data %>%
group_by(company_id) %>%
summarise(
avg_salary = round(mean(salary), 0),
avg_performance = round(mean(performance_score), 1),
avg_kpi = round(mean(kpi_score), 1),
highest_kpi = max(kpi_score),
total_top = sum(top_performer == "Yes"),
.groups = "drop"
)
print(company_summary)## # A tibble: 4 × 6
## company_id avg_salary avg_performance avg_kpi highest_kpi total_top
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 Company-1 11838980 69.8 70.7 100 4
## 2 Company-2 11915675 73.3 75.6 100 4
## 3 Company-3 11504080 70.6 72.3 100 3
## 4 Company-4 10394888 77.4 80.1 100 10
ggplot(company_summary,
aes(x = factor(company_id, levels = paste0("Company-", 1:4)),
y = avg_kpi,
fill = company_id)) +
# Create the bar chart
geom_col(width = 0.6) +
# Add value labels on top of bars (rounded to 1 decimal place)
geom_text(aes(label = round(avg_kpi, 1)),
vjust = -0.5, size = 4.5, fontface = "bold") +
# Titles and Axis Labels
labs(
title = "Average KPI Score per Company",
subtitle = "Performance comparison across companies",
x = "Company",
y = "Average KPI Score"
) +
# Professional theme and styling
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, color = "gray40"),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
panel.grid.major.x = element_blank() # Clean up vertical grid lines
) +
# Color palette
scale_fill_brewer(palette = "Set2")company_data$company_id <- factor(
company_data$company_id,
levels = c("Company-1", "Company-2", "Company-3", "Company-4")
)
# ================================
# Visualization 2: Salary Distribution
# ================================
plot2 <- ggplot(company_data,
aes(x = company_id,
y = salary,
fill = company_id)) +
geom_boxplot(width = 0.5, alpha = 0.7, outlier.color = "red") +
geom_jitter(width = 0.15, alpha = 0.3, size = 1.5) +
stat_summary(fun = median, geom = "point", size = 3) +
labs(
title = "Task 4: Salary Distribution per Company",
subtitle = "Ordered by Company ID (Company-1 to Company-4)",
x = "Company",
y = "Salary (IDR)"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 11)
) +
scale_fill_brewer(palette = "Set2")
plot25 Task: Monte Carlo simulation
This task implements a Monte Carlo simulation to estimate the value of Pi using randomly generated points inside a unit square. A total of 2,000 random points are plotted, and the ratio of points falling inside the circle is used to approximate Pi. Additionally, the probability of a random point landing inside a defined sub-square region is calculated and visualized.
library(ggplot2)
# Monte Carlo simulation function to estimate the value of Pi
monte_carlo_pi <- function(num_points) {
set.seed(99)
# Generate random points in range [-1, 1]
x_coord <- runif(num_points, -1, 1)
y_coord <- runif(num_points, -1, 1)
# Compute distance from origin (0,0)
distance <- sqrt(x_coord^2 + y_coord^2)
# Classify points
position <- ifelse(distance <= 1, "Inside", "Outside")
# Pi estimation
points_inside <- sum(position == "Inside")
pi_estimate <- (points_inside / num_points) * 4
# Sub-square probability (0 ≤ x ≤ 0.5 and 0 ≤ y ≤ 0.5)
in_subsquare <- sum(x_coord >= 0 & x_coord <= 0.5 &
y_coord >= 0 & y_coord <= 0.5)
prob_subsquare <- round(in_subsquare / num_points, 4)
# Clear console output
cat("========== Monte Carlo Simulation ==========\n")
cat("Total Points :", num_points, "\n")
cat("Points Inside :", points_inside, "\n")
cat("Estimated Pi :", round(pi_estimate, 5), "\n")
cat("Actual Pi :", round(pi, 5), "\n")
cat("Sub-square Prob. :", prob_subsquare, "\n")
cat("===========================================\n")
# Return structured data
return(list(
data = data.frame(x = x_coord, y = y_coord, position = position),
pi_estimate = pi_estimate,
prob_subsquare = prob_subsquare
))
}
# Run simulation
result <- monte_carlo_pi(num_points = 2000)## ========== Monte Carlo Simulation ==========
## Total Points : 2000
## Points Inside : 1571
## Estimated Pi : 3.142
## Actual Pi : 3.14159
## Sub-square Prob. : 0.055
## ===========================================
monte_carlo_data <- result$data
# Create circle boundary
circle_angle <- seq(0, 2 * pi, length.out = 300)
circle_path <- data.frame(
cx = cos(circle_angle),
cy = sin(circle_angle)
)ggplot(monte_carlo_data, aes(x = x, y = y, color = position)) +
# Individual points
geom_point(size = 1, alpha = 0.6) +
# Circle boundary
geom_path(data = circle_path, aes(x = cx, y = cy),
color = "black", linewidth = 1, inherit.aes = FALSE) +
# Sub-square analysis box
annotate("rect", xmin = 0, xmax = 0.5, ymin = 0, ymax = 0.5,
fill = NA, color = "purple", linewidth = 1, linetype = "dashed") +
# Labels and titles
labs(
title = "Task 5: Monte Carlo Simulation - Pi Estimation",
subtitle = "Purple dashed box = Sub-square for probability analysis",
x = "X Coordinate",
y = "Y Coordinate",
color = "Point Position",
caption = paste("Estimated Pi =", round((sum(monte_carlo_data$position ==
"Inside")/2000)*4, 4))
) +
# Custom colors (Blue for Inside, Red for Outside)
scale_color_manual(values = c("Inside" = "#2196F3", "Outside" = "#F44336")) +
coord_fixed() +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 14))6 Task: Data Transformation
This task demonstrates data transformation techniques using loop-based normalization. Two methods are applied: Min-Max normalization, which rescales values to a range of 0 to 1, and Z-Score standardization, which centers values around a mean of 0 with a standard deviation of 1. New features such as salary bracket and performance category are also engineered from the existing dataset. Distributions before and after transformation are compared using histograms and boxplots.
library(dplyr)
library(ggplot2)
# Min-Max Normalization (scale 0–1)
minmax_normalization <- function(df) {
result <- df
for (col in names(df)) {
if (is.numeric(df[[col]])) {
min_val <- min(df[[col]], na.rm = TRUE)
max_val <- max(df[[col]], na.rm = TRUE)
result[[col]] <- round(
(df[[col]] - min_val) / (max_val - min_val), 4
)
}
}
return(result)
}
# Z-Score Standardization (mean = 0, sd = 1)
zscore_normalization <- function(df) {
result <- df
for (col in names(df)) {
if (is.numeric(df[[col]])) {
mean_val <- mean(df[[col]], na.rm = TRUE)
sd_val <- sd(df[[col]], na.rm = TRUE)
result[[col]] <- round(
(df[[col]] - mean_val) / sd_val, 4
)
}
}
return(result)
}
# 3. DATA PREPARATION
# Select numeric columns
numeric_data <- company_data[, c("salary", "performance_score", "kpi_score")]
# Apply normalization
data_minmax <- minmax_normalization(numeric_data)
data_zscore <- zscore_normalization(numeric_data)
# 4. FEATURE ENGINEERING
company_data <- company_data %>%
mutate(
salary_category = case_when(
salary >= 15000000 ~ "High",
salary >= 9000000 ~ "Medium",
TRUE ~ "Low"
),
performance_category = case_when(
performance_score >= 85 ~ "Excellent",
performance_score >= 70 ~ "Good",
TRUE ~ "Needs Improvement"
)
)
# 5. DATA COMBINATION FOR VISUALIZATION
# Salary comparison (before vs after normalization)
before_data <- numeric_data %>% mutate(status = "Before Normalization")
after_data <- data_minmax %>% mutate(status = "After Min-Max")
combined_data <- rbind(before_data, after_data)# 6. VISUALIZATION 1: SALARY DISTRIBUTION
ggplot(combined_data, aes(x = salary, fill = status)) +
geom_histogram(
bins = 12,
alpha = 0.5,
position = "identity",
color = "white"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
labs(
title = "Salary Distribution Before and After Normalization",
subtitle = "Comparison using Min-Max Normalization",
x = "Salary Value",
y = "Frequency",
fill = "Data Status"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5),
axis.title = element_text(face = "bold"),
panel.grid.major.x = element_blank()
) +
scale_fill_manual(values = c(
"Before Normalization" = "#EF9A9A",
"After Min-Max" = "#90CAF9"
))# 7. VISUALIZATION 2: KPI BOXPLOT
# Combine KPI data (before vs after normalization)
# Combine KPI data
kpi_combined <- data.frame(
value = c(numeric_data$kpi_score, data_minmax$kpi_score),
status = c(
rep("Before Normalization", nrow(numeric_data)),
rep("After Min-Max", nrow(data_minmax))
)
)
# Plot
ggplot(kpi_combined, aes(x = status, y = value, fill = status)) +
geom_boxplot(width = 0.5, alpha = 0.7, outlier.color = "red") +
geom_jitter(width = 0.15, alpha = 0.3, size = 1.5) +
labs(
title = "KPI Score Distribution Before and After Normalization",
subtitle = "Min-Max normalization rescales data to 0–1 range",
x = "Data Status",
y = "KPI Score"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5),
axis.title = element_text(face = "bold"),
axis.text = element_text(size = 11)
) +
scale_fill_manual(values = c(
"Before Normalization" = "#FFCC80",
"After Min-Max" = "#A5D6A7"
))7 Task: Mini Project - Dashboard
This mini project generates a large-scale dataset consisting of 6 companies with 50 employees each. The dataset includes employee ID, company ID, salary, performance score, KPI score, and department. Employees are categorized into KPI tiers: Platinum, Gold, Silver, and Bronze. The analysis includes per-company summaries, department-level comparisons, and advanced visualizations such as grouped bar charts, scatter plots with regression lines, and salary distribution histograms.
library(dplyr)
library(ggplot2)
generate_company_data <- function(num_companies, num_employees) {
set.seed(123)
departments <- c("Finance", "Marketing", "Operations", "HR", "Technology")
all_data <- data.frame()
for (c in 1:num_companies) {
for (e in 1:num_employees) {
performance_score <- round(runif(1, 50, 100), 1)
# KPI calculation
if (performance_score >= 85) {
kpi_score <- performance_score + runif(1, 5, 15)
} else if (performance_score >= 70) {
kpi_score <- performance_score + runif(1, 0, 10)
} else {
kpi_score <- performance_score - runif(1, 0, 5)
}
kpi_score <- min(round(kpi_score, 1), 100)
salary <- round(runif(1, 4000000, 20000000), 0)
department <- sample(departments, 1)
top_performer <- ifelse(kpi_score > 90, "Yes", "No")
row <- data.frame(
company_id = paste0("Company-", c),
employee_id = paste0("E", c, "-", e),
salary = salary,
department = department,
performance_score = performance_score,
kpi_score = kpi_score,
top_performer = top_performer
)
all_data <- rbind(all_data, row)
}
}
return(all_data)
}
data_dashboard <- generate_company_data(6, 50)
# Ensure correct order
data_dashboard$company_id <- factor(
data_dashboard$company_id,
levels = paste0("Company-", 1:6)
)
# Add categories
data_dashboard <- data_dashboard %>%
mutate(
salary_category = case_when(
salary >= 15000000 ~ "High",
salary >= 9000000 ~ "Medium",
TRUE ~ "Low"
),
kpi_level = case_when(
kpi_score >= 90 ~ "Platinum",
kpi_score >= 80 ~ "Gold",
kpi_score >= 70 ~ "Silver",
TRUE ~ "Bronze"
)
)
dashboard_summary <- data_dashboard %>%
group_by(company_id) %>%
summarise(
avg_salary = round(mean(salary), 0),
avg_kpi = round(mean(kpi_score), 1),
avg_performance = round(mean(performance_score), 1),
total_top = sum(top_performer == "Yes"),
.groups = "drop"
)
print(dashboard_summary)## # A tibble: 6 × 5
## company_id avg_salary avg_kpi avg_performance total_top
## <fct> <dbl> <dbl> <dbl> <int>
## 1 Company-1 11985826 72.4 71.1 9
## 2 Company-2 11127014 76.9 74.8 18
## 3 Company-3 12609185 78.2 76 17
## 4 Company-4 11493841 73.7 71.4 12
## 5 Company-5 12365574 79.5 76.7 19
## 6 Company-6 11371336 73.3 71.7 12
7.1 Chart 1 : Top Performers per Company
The number of top performers varies across companies, indicating differences in employee performance quality. Some companies have significantly more high-performingemployees, which suggests stronger performance management or talent quality.
ggplot(dashboard_summary,
aes(x = factor(company_id, levels = paste0("Company-", 1:6)),
y = total_top,
fill = company_id)) +
# Bar
geom_col(width = 0.6) +
# Label di atas bar
geom_text(aes(label = total_top),
vjust = -0.4, size = 4, fontface = "bold") +
# Skala Y biar tidak kepotong
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
labs(
title = "Top Performers per Company",
subtitle = "Employees with KPI Score Above 90",
x = "Company",
y = "Number of Top Performers"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 11),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
axis.title = element_text(face = "bold"),
panel.grid.major.x = element_blank()
) +
scale_fill_brewer(palette = "Set2")7.2 Chart 2 : Average KPI by Department & Company
KPI scores differ not only between companies but also across departments. Certain departments consistently show higher KPI averages, suggesting that performance may depend on the nature of the department and its operational focus.
# Summary
department_summary <- data_dashboard %>%
group_by(company_id, department) %>%
summarise(avg_kpi = round(mean(kpi_score), 1), .groups = "drop")
# Plot
ggplot(department_summary,
aes(x = factor(department,
levels = c("Finance","Marketing","Operations","HR","Technology")),
y = avg_kpi,
fill = company_id)) +
# Bar chart
geom_col(position = position_dodge(width = 0.7), width = 0.6) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
labs(
title = "Average KPI by Department and Company",
subtitle = "Comparison across departments",
x = "Department",
y = "Average KPI",
fill = "Company"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 25, hjust = 1),
axis.title = element_text(face = "bold"),
panel.grid.major.x = element_blank()
) +
scale_fill_brewer(palette = "Set2")7.3 Chart 3 : Performance vs KPI Relationship
There is a clear positive relationship between performance scores and KPI scores. Employees with higher performance scores tend to achieve higher KPI values, confirming that KPI evaluation aligns well with performance metrics.
ggplot(data_dashboard,
aes(x = performance_score,
y = kpi_score,
color = company_id)) +
# Titik data
geom_point(alpha = 0.6, size = 2) +
# Garis regresi
geom_smooth(
method = "lm",
formula = y ~ x,
se = FALSE,
linewidth = 1
) +
geom_hline(yintercept = 80, linetype = "dashed", color = "gray60") +
geom_vline(xintercept = 75, linetype = "dashed", color = "gray60") +
labs(
title = "Performance Score vs KPI Score",
subtitle = "Relationship between performance and KPI across companies",
x = "Performance Score",
y = "KPI Score",
color = "Company"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5),
axis.title = element_text(face = "bold"),
axis.text = element_text(size = 11)
) +
scale_color_brewer(palette = "Dark2")7.4 Chart 4 : Salary Distribution per Company
Salary distributions vary across companies, but most follow a relatively simila r spread. This indicates that while salary ranges are comparable, there may still be slight differences in compensation strategies between companies.
ggplot(data_dashboard, aes(x = salary, fill = company_id)) +
geom_histogram(
bins = 10,
color = "white",
alpha = 0.8
) +
facet_wrap(~company_id, ncol = 3) +
labs(
title = "Salary Distribution per Company",
subtitle = "Histogram of employee salaries across companies",
x = "Salary (IDR)",
y = "Frequency"
) +
theme_minimal() +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_fill_brewer(palette = "Set2")7.5 Chart 5 : KPI Level Distribution
Most employees fall into the Gold and Silver categories, while fewer reach the Platinum level. This suggests that achieving top-tier performance is relatively rare, and most employees perform at a moderate to high level.
kpi_level_summary <- data_dashboard %>%
count(company_id, kpi_level) %>%
mutate(
kpi_level = factor(kpi_level,
levels = c("Platinum", "Gold", "Silver", "Bronze")
),
company_id = factor(company_id, levels = paste0("Company-", 1:6))
)
# Plot
ggplot(kpi_level_summary,
aes(x = company_id, y = n, fill = kpi_level)) +
# Bar
geom_col(width = 0.6) +
# Label
geom_text(aes(label = n),
position = position_stack(vjust = 0.5),
size = 3.5, color = "white") +
labs(
title = "KPI Level Distribution per Company",
subtitle = "Employee classification based on KPI score",
x = "Company",
y = "Number of Employees",
fill = "KPI Level"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5),
axis.title = element_text(face = "bold"),
axis.text = element_text(size = 11)
) +
scale_fill_manual(values = c(
"Platinum" = "#9C27B0",
"Gold" = "#FFC107",
"Silver" = "#9E9E9E",
"Bronze" = "#795548"
))8 Automated Report Generation
This task focuses on building an automated reporting system using R. The goal is to generate reports for each company that include summary statistics and visualizations. By using functions and loops, the process becomes faster, more consistent, and reduces manual errors.
library(dplyr)
library(ggplot2)
# =====================================================
# 1. FUNCTION FIX (Ensure argument is: 'comp')
# =====================================================
generate_company_report <- function(data, comp) {
# Filter data based on company ID
# Use 'comp' according to the function argument
company_data <- data %>%
filter(company_id == comp)
# Validation: if no data found, stop the function
if (nrow(company_data) == 0) return(NULL)
cat("\n\n## REPORT FOR:", comp, "\n")
cat("-------------------------------------\n")
# ------------------------------
# SUMMARY STATISTICS
# ------------------------------
# Use na.rm = TRUE to avoid errors from missing values
summary_table <- company_data %>%
summarise(
avg_salary = round(mean(salary, na.rm = TRUE), 0),
avg_performance = round(mean(performance_score, na.rm = TRUE), 2),
avg_kpi = round(mean(kpi_score, na.rm = TRUE), 2),
total_employees = n()
)
print(knitr::kable(summary_table))
# ------------------------------
# PLOT: SALARY DISTRIBUTION
# ------------------------------
plot_salary <- ggplot(company_data, aes(x = salary)) +
geom_histogram(
bins = 10,
fill = "#A5D6A7",
color = "white"
) +
labs(
title = paste("Salary Distribution -", comp),
x = "Salary",
y = "Frequency"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5) # Center title
)
print(plot_salary)
cat("\n")
}
# =====================================================
# 2. LOOP EXECUTION (Make sure data_dashboard exists)
# =====================================================
# Get unique company IDs
company_list <- unique(data_dashboard$company_id)
# Run loop
for (item in company_list) {
generate_company_report(data_dashboard, item)
}##
##
## ## REPORT FOR: Company-1
## -------------------------------------
##
##
## | avg_salary| avg_performance| avg_kpi| total_employees|
## |----------:|---------------:|-------:|---------------:|
## | 11985826| 71.08| 72.35| 50|
##
##
##
## ## REPORT FOR: Company-2
## -------------------------------------
##
##
## | avg_salary| avg_performance| avg_kpi| total_employees|
## |----------:|---------------:|-------:|---------------:|
## | 11127014| 74.77| 76.87| 50|
##
##
##
## ## REPORT FOR: Company-3
## -------------------------------------
##
##
## | avg_salary| avg_performance| avg_kpi| total_employees|
## |----------:|---------------:|-------:|---------------:|
## | 12609185| 76| 78.23| 50|
##
##
##
## ## REPORT FOR: Company-4
## -------------------------------------
##
##
## | avg_salary| avg_performance| avg_kpi| total_employees|
## |----------:|---------------:|-------:|---------------:|
## | 11493841| 71.44| 73.71| 50|
##
##
##
## ## REPORT FOR: Company-5
## -------------------------------------
##
##
## | avg_salary| avg_performance| avg_kpi| total_employees|
## |----------:|---------------:|-------:|---------------:|
## | 12365574| 76.66| 79.47| 50|
##
##
##
## ## REPORT FOR: Company-6
## -------------------------------------
##
##
## | avg_salary| avg_performance| avg_kpi| total_employees|
## |----------:|---------------:|-------:|---------------:|
## | 11371336| 71.67| 73.35| 50|