library(readxl)
library(tidyverse)
library(janitor)
library(scales)
library(corrplot)
library(randomForest)
library(caret)
library(patchwork)
library(gt)
library(lubridate)
library(broom)
theme_ceres <- theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14, color = "#1a1a2e"),
plot.subtitle = element_text(size = 10, color = "#64748b"),
plot.caption = element_text(size = 8, color = "#94a3b8"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "#e2e8f0"),
legend.position = "bottom",
strip.text = element_text(face = "bold")
)
theme_set(theme_ceres)
ceres_pal <- c("#f97316", "#3b82f6", "#10b981", "#f59e0b",
"#8b5cf6", "#ef4444", "#06b6d4", "#ec4899")
file_path <- "C:/Users/malgh/Downloads/Ceres ISA 405 COPY 2.xlsx"
techs <- read_excel(file_path, sheet = "Technicians") %>% clean_names()
sr <- read_excel(file_path, sheet = "Service_Requests") %>% clean_names()
costs <- read_excel(file_path, sheet = "Cost_Analysis") %>% clean_names()
skills <- read_excel(file_path, sheet = "Skills_Registry") %>% clean_names()
tibble(
Sheet = c("Technicians", "Service Requests", "Cost Analysis", "Skills Registry"),
Rows = c(nrow(techs), nrow(sr), nrow(costs), nrow(skills)),
Cols = c(ncol(techs), ncol(sr), ncol(costs), ncol(skills))
) %>%
gt() %>%
tab_header(title = md("**Dataset Overview**"))
| Dataset Overview | ||
| Sheet | Rows | Cols |
|---|---|---|
| Technicians | 50 | 25 |
| Service Requests | 200 | 22 |
| Cost Analysis | 149 | 16 |
| Skills Registry | 150 | 10 |
summary(sr %>% select(days_to_assign, days_to_resolve, repair_cost_usd, customer_sat_score))
## days_to_assign days_to_resolve repair_cost_usd customer_sat_score
## Min. : 0.000 Min. : 1.0 Min. : 630 Min. :1.500
## 1st Qu.: 3.000 1st Qu.: 7.0 1st Qu.: 4056 1st Qu.:2.375
## Median : 8.000 Median :15.0 Median : 7570 Median :3.200
## Mean : 7.186 Mean :14.7 Mean : 7919 Mean :3.220
## 3rd Qu.:11.000 3rd Qu.:23.0 3rd Qu.:11714 3rd Qu.:4.125
## Max. :14.000 Max. :30.0 Max. :14913 Max. :5.000
## NA's :44 NA's :96 NA's :96 NA's :96
status_summary <- sr %>%
count(status) %>%
mutate(pct = n / sum(n))
ggplot(status_summary, aes(x = reorder(status, n), y = n, fill = status)) +
geom_col(width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(n, " (", percent(pct, 1), ")")),
hjust = -0.1, size = 3.5, fontface = "bold") +
coord_flip(ylim = c(0, max(status_summary$n) * 1.2)) +
scale_fill_manual(values = c("Resolved" = "#10b981", "Pending" = "#f59e0b",
"Escalated" = "#ef4444", "In Progress" = "#3b82f6")) +
labs(title = "Service Request Status Distribution",
subtitle = paste0("n = ", nrow(sr), " total requests"),
x = NULL, y = "Count")
sla_data <- sr %>% filter(sla_breached_yn %in% c("Y", "N"))
sla_rate <- mean(sla_data$sla_breached_yn == "Y")
Overall SLA Breach Rate: 88.5% (138 of 156 assigned requests)
sla_by_priority <- sla_data %>%
group_by(priority) %>%
summarise(
total = n(),
breached = sum(sla_breached_yn == "Y"),
rate = breached / total,
.groups = "drop"
) %>%
mutate(priority = factor(priority, levels = c("Critical", "High", "Medium", "Low")))
ggplot(sla_by_priority, aes(x = priority, y = rate, fill = priority)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = percent(rate, 0.1)), vjust = -0.5, fontface = "bold", size = 4) +
scale_y_continuous(labels = percent, limits = c(0, 1.1)) +
scale_fill_manual(values = c("Critical" = "#ef4444", "High" = "#f97316",
"Medium" = "#f59e0b", "Low" = "#3b82f6")) +
labs(title = "SLA Breach Rate by Priority Level",
subtitle = "Breach rate is priority-agnostic. The system has no prioritization logic.",
x = NULL, y = "Breach Rate")
method_perf <- sr %>%
filter(!is.na(assignment_method)) %>%
group_by(assignment_method) %>%
summarise(
count = n(),
avg_days_assign = mean(days_to_assign, na.rm = TRUE),
avg_days_resolve = mean(days_to_resolve, na.rm = TRUE),
avg_csat = mean(customer_sat_score, na.rm = TRUE),
sla_breach_rate = mean(sla_breached_yn == "Y", na.rm = TRUE),
.groups = "drop"
)
p1 <- ggplot(method_perf, aes(x = reorder(assignment_method, avg_days_assign),
y = avg_days_assign, fill = assignment_method)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = round(avg_days_assign, 1)), hjust = -0.2, fontface = "bold") +
coord_flip(ylim = c(0, 10)) +
scale_fill_manual(values = ceres_pal) +
labs(title = "Avg Days to Assign by Method",
subtitle = "All methods cluster around 7 days. The process itself is broken.",
x = NULL, y = "Days")
p2 <- ggplot(method_perf, aes(x = reorder(assignment_method, avg_days_resolve),
y = avg_days_resolve, fill = assignment_method)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = round(avg_days_resolve, 1)), hjust = -0.2, fontface = "bold") +
coord_flip(ylim = c(0, 20)) +
scale_fill_manual(values = ceres_pal) +
labs(title = "Avg Days to Resolve by Method", x = NULL, y = "Days")
p1 / p2
monthly <- sr %>%
mutate(month = floor_date(date_submitted, "month")) %>%
group_by(month) %>%
summarise(
requests = n(),
resolved = sum(status == "Resolved"),
breaches = sum(sla_breached_yn == "Y", na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(cols = c(requests, resolved, breaches),
names_to = "metric", values_to = "value")
ggplot(monthly, aes(x = month, y = value, color = metric)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2.5) +
scale_color_manual(values = c("requests" = "#f97316", "resolved" = "#10b981",
"breaches" = "#ef4444"),
labels = c("SLA Breaches", "Total Requests", "Resolved")) +
scale_x_datetime(date_labels = "%b", date_breaks = "1 month") +
labs(title = "Monthly Service Request Trends (2024)",
subtitle = "SLA breaches consistently track total volume. No improvement over time.",
x = NULL, y = "Count", color = NULL)
issue_perf <- sr %>%
filter(!is.na(days_to_resolve)) %>%
group_by(issue_type) %>%
summarise(
count = n(),
avg_resolve = mean(days_to_resolve),
avg_cost = mean(repair_cost_usd, na.rm = TRUE),
.groups = "drop"
)
ggplot(issue_perf, aes(x = reorder(issue_type, avg_resolve),
y = avg_resolve, fill = avg_resolve)) +
geom_col(width = 0.65, show.legend = FALSE) +
geom_text(aes(label = paste0(round(avg_resolve, 1), "d")),
hjust = -0.15, size = 3.5, fontface = "bold") +
coord_flip(ylim = c(0, max(issue_perf$avg_resolve) * 1.15)) +
scale_fill_gradient(low = "#10b981", high = "#ef4444") +
labs(title = "Avg Resolution Time by Issue Type",
subtitle = "Data Transmission failures take 2.4x longer than Gateway Offline issues",
x = NULL, y = "Avg Days to Resolve")
cost_compare <- tibble(
category = c("Internal Cost", "Contractor Cost", "Revenue Loss", "Total Impact"),
value = c(mean(costs$internal_cost_usd, na.rm = TRUE),
mean(costs$contractor_cost_usd, na.rm = TRUE),
mean(costs$revenue_loss_estimate_usd, na.rm = TRUE),
mean(costs$total_impact_usd, na.rm = TRUE))
)
ggplot(cost_compare, aes(x = reorder(category, value), y = value, fill = category)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = dollar(value, accuracy = 1)), hjust = -0.1, fontface = "bold", size = 3.5) +
coord_flip(ylim = c(0, max(cost_compare$value) * 1.15)) +
scale_fill_manual(values = c("Internal Cost" = "#10b981", "Contractor Cost" = "#ef4444",
"Revenue Loss" = "#f59e0b", "Total Impact" = "#8b5cf6")) +
scale_y_continuous(labels = dollar) +
labs(title = "Average Cost per Service Request",
subtitle = "Contractors cost 3.8x more than internal technicians",
x = NULL, y = "USD")
Contractor Premium Total: $224,819
prev_react <- costs %>%
filter(!is.na(preventive_vs_reactive)) %>%
group_by(repair_type = preventive_vs_reactive) %>%
summarise(
count = n(),
avg_cost = mean(actual_cost_usd, na.rm = TRUE),
avg_downtime = mean(equipment_downtime_hours, na.rm = TRUE),
avg_rev_loss = mean(revenue_loss_estimate_usd, na.rm = TRUE),
total_impact = sum(total_impact_usd, na.rm = TRUE),
.groups = "drop"
) %>%
filter(!is.na(repair_type))
prev_react %>%
select(repair_type, avg_downtime, avg_rev_loss) %>%
pivot_longer(-repair_type, names_to = "metric", values_to = "value") %>%
mutate(metric = recode(metric,
"avg_downtime" = "Avg Downtime (hrs)",
"avg_rev_loss" = "Avg Revenue Loss ($)")) %>%
ggplot(aes(x = repair_type, y = value, fill = repair_type)) +
geom_col(width = 0.5, show.legend = FALSE) +
geom_text(aes(label = ifelse(value > 1000, dollar(value, accuracy = 1), round(value, 1))),
vjust = -0.3, fontface = "bold", size = 3.5) +
facet_wrap(~metric, scales = "free_y") +
scale_fill_manual(values = c("Preventive" = "#10b981", "Reactive" = "#ef4444")) +
labs(title = "Preventive vs Reactive Maintenance",
subtitle = "Reactive: 11x more downtime, 11x more revenue loss",
x = NULL, y = NULL)
techs %>%
count(workload_status) %>%
mutate(workload_status = factor(workload_status,
levels = c("Available", "Busy", "At Capacity", "Unavailable", "Overloaded"))) %>%
ggplot(aes(x = workload_status, y = n, fill = workload_status)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = n), vjust = -0.3, fontface = "bold") +
scale_fill_manual(values = c("Available" = "#10b981", "Busy" = "#f59e0b",
"At Capacity" = "#f97316", "Unavailable" = "#ef4444", "Overloaded" = "#8b5cf6")) +
labs(title = "Technician Workload Distribution",
subtitle = "Only 4 of 50 techs are 'Available', yet avg utilization is just 67%",
x = NULL, y = "Count")
dept_summary <- techs %>%
group_by(department) %>%
summarise(
n = n(),
avg_util = mean(monthly_utilization_pct),
avg_rate = mean(billable_rate_usd_day),
avg_resolve = mean(avg_resolution_days),
avg_csat = mean(customer_satisfaction_score),
.groups = "drop"
)
dept_summary %>%
pivot_longer(cols = c(avg_util, avg_rate, avg_csat),
names_to = "metric", values_to = "value") %>%
mutate(metric = recode(metric,
"avg_util" = "Utilization (%)",
"avg_rate" = "Bill Rate ($/day)",
"avg_csat" = "CSAT (out of 5)")) %>%
ggplot(aes(x = department, y = value, fill = department)) +
geom_col(width = 0.5, show.legend = FALSE) +
geom_text(aes(label = round(value, 1)), vjust = -0.3, size = 3.2, fontface = "bold") +
facet_wrap(~metric, scales = "free_y") +
scale_fill_manual(values = ceres_pal[1:3]) +
labs(title = "Department Performance Comparison", x = NULL, y = NULL)
quality_rate <- mean(!is.na(skills$data_quality_issue))
skills %>%
filter(!is.na(data_quality_issue)) %>%
count(data_quality_issue) %>%
ggplot(aes(x = reorder(data_quality_issue, n), y = n, fill = data_quality_issue)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.2, fontface = "bold") +
coord_flip(ylim = c(0, 30)) +
scale_fill_manual(values = ceres_pal) +
labs(title = "Skills Registry: Data Quality Issues",
subtitle = paste0(sum(!is.na(skills$data_quality_issue)), " of ",
nrow(skills), " records (", percent(quality_rate, 0.1), ") have problems"),
x = NULL, y = "Count")
skills %>%
count(data_source) %>%
ggplot(aes(x = reorder(data_source, n), y = n, fill = data_source)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.2, fontface = "bold") +
coord_flip(ylim = c(0, 50)) +
scale_fill_manual(values = ceres_pal) +
labs(title = "Skills Data Source Distribution",
subtitle = "No single source of truth. Data scattered across 5 systems.",
x = NULL, y = "Count")
cor_data <- sr %>%
select(days_to_assign, days_to_resolve, repair_cost_usd, customer_sat_score) %>%
drop_na()
cor_matrix <- cor(cor_data)
corrplot(cor_matrix, method = "color", type = "upper",
addCoef.col = "black", number.cex = 0.9, tl.col = "black",
title = "Correlation: Assignment, Resolution, Cost, Satisfaction",
mar = c(0, 0, 2, 0))
product_summary <- sr %>%
group_by(product) %>%
summarise(
requests = n(),
avg_cost = mean(repair_cost_usd, na.rm = TRUE),
avg_resolve = mean(days_to_resolve, na.rm = TRUE),
avg_csat = mean(customer_sat_score, na.rm = TRUE),
breach_rate = mean(sla_breached_yn == "Y", na.rm = TRUE),
.groups = "drop"
)
ggplot(product_summary, aes(x = reorder(product, avg_cost), y = avg_cost, fill = avg_cost)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = dollar(avg_cost, accuracy = 1)), hjust = -0.1, size = 3.2, fontface = "bold") +
coord_flip(ylim = c(0, max(product_summary$avg_cost) * 1.15)) +
scale_fill_gradient(low = "#10b981", high = "#ef4444") +
labs(title = "Average Repair Cost by Product",
subtitle = "CeresGateway X100 and IoT Edge Node are the costliest to service",
x = NULL, y = "Avg Repair Cost (USD)")
model_data_sla <- sr %>%
filter(sla_breached_yn %in% c("Y", "N"), !is.na(days_to_assign)) %>%
mutate(
sla_breach = ifelse(sla_breached_yn == "Y", 1, 0),
priority = factor(priority, levels = c("Low", "Medium", "High", "Critical")),
channel = factor(channel),
issue_type = factor(issue_type)
)
sla_model <- glm(sla_breach ~ days_to_assign + priority + channel + issue_type,
data = model_data_sla, family = binomial)
summary(sla_model)
##
## Call:
## glm(formula = sla_breach ~ days_to_assign + priority + channel +
## issue_type, family = binomial, data = model_data_sla)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.957440 1.622302 -0.590 0.55507
## days_to_assign 1.173036 0.363841 3.224 0.00126 **
## priorityMedium -0.544372 1.016620 -0.535 0.59232
## priorityHigh -1.218744 1.173837 -1.038 0.29915
## priorityCritical -0.586973 1.163122 -0.505 0.61380
## channelEmail -1.716175 1.123460 -1.528 0.12662
## channelField Report -1.837546 1.086308 -1.692 0.09073 .
## channelPhone -1.600604 1.306261 -1.225 0.22045
## issue_typeConnectivity Loss 2.133186 1.719986 1.240 0.21489
## issue_typeData Transmission Failure 2.180300 1.701185 1.282 0.19997
## issue_typeFirmware Incompatibility -0.005494 1.372591 -0.004 0.99681
## issue_typeGateway Offline 1.312189 2.000218 0.656 0.51181
## issue_typePower Supply Failure 0.956777 1.453583 0.658 0.51040
## issue_typeSensor Malfunction 2.082403 1.590455 1.309 0.19043
## issue_typeSoftware Crash 1.950669 1.424177 1.370 0.17079
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 111.580 on 155 degrees of freedom
## Residual deviance: 53.782 on 141 degrees of freedom
## AIC: 83.782
##
## Number of Fisher Scoring iterations: 9
sla_tidy <- tidy(sla_model, conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(odds_ratio = exp(estimate))
ggplot(sla_tidy, aes(x = reorder(term, estimate), y = estimate)) +
geom_point(size = 3, color = "#f97316") +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, color = "#64748b") +
geom_hline(yintercept = 0, linetype = "dashed", color = "#ef4444") +
coord_flip() +
labs(title = "SLA Breach: Logistic Regression Coefficients",
subtitle = "Positive = increases breach probability | Bars = 95% CI",
x = NULL, y = "Log-Odds Coefficient")
model_data_resolve <- sr %>%
filter(!is.na(days_to_resolve), !is.na(days_to_assign)) %>%
mutate(
priority = factor(priority, levels = c("Low", "Medium", "High", "Critical")),
issue_type = factor(issue_type),
assignment_method = factor(assignment_method),
contractor = ifelse(contractor_used_yn == "Y", 1, 0)
)
resolve_model <- lm(days_to_resolve ~ days_to_assign + priority + issue_type +
assignment_method + contractor,
data = model_data_resolve)
summary(resolve_model)
##
## Call:
## lm(formula = days_to_resolve ~ days_to_assign + priority + issue_type +
## assignment_method + contractor, data = model_data_resolve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.5080 -6.3024 0.1616 5.5019 15.4098
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.6704 3.5118 4.177 6.87e-05 ***
## days_to_assign -0.2955 0.2127 -1.389 0.16816
## priorityMedium 0.2876 2.3526 0.122 0.90298
## priorityHigh 3.4389 2.5106 1.370 0.17422
## priorityCritical -1.4768 2.3442 -0.630 0.53033
## issue_typeConnectivity Loss 2.5275 3.9120 0.646 0.51989
## issue_typeData Transmission Failure 8.6783 3.2770 2.648 0.00957 **
## issue_typeFirmware Incompatibility 0.2314 3.6906 0.063 0.95014
## issue_typeGateway Offline -4.0524 3.1177 -1.300 0.19702
## issue_typePower Supply Failure -1.4189 3.0579 -0.464 0.64376
## issue_typeSensor Malfunction -0.3259 3.1870 -0.102 0.91878
## issue_typeSoftware Crash 5.2767 2.9656 1.779 0.07860 .
## assignment_methodManual-HR Excel 0.0251 2.3777 0.011 0.99160
## assignment_methodManual-Phone Call 0.5290 2.5375 0.208 0.83535
## assignment_methodSystem Matched 1.0560 2.3842 0.443 0.65889
## contractor NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.274 on 89 degrees of freedom
## Multiple R-squared: 0.2508, Adjusted R-squared: 0.1329
## F-statistic: 2.128 on 14 and 89 DF, p-value: 0.01726
resolve_tidy <- tidy(resolve_model, conf.int = TRUE) %>%
filter(term != "(Intercept)")
ggplot(resolve_tidy, aes(x = reorder(term, estimate), y = estimate)) +
geom_point(size = 3, color = "#3b82f6") +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, color = "#64748b") +
geom_hline(yintercept = 0, linetype = "dashed", color = "#ef4444") +
coord_flip() +
labs(title = "Resolution Time: Linear Regression Coefficients",
subtitle = "Positive = adds days to resolution | Bars = 95% CI",
x = NULL, y = "Effect on Days to Resolve")
par(mfrow = c(2, 2))
plot(resolve_model, which = 1:4)
par(mfrow = c(1, 1))
model_data_cost <- costs %>%
filter(!is.na(total_impact_usd), !is.na(preventive_vs_reactive)) %>%
mutate(
is_reactive = ifelse(preventive_vs_reactive == "Reactive", 1, 0),
emp_type = factor(employment_type)
)
cost_model <- lm(total_impact_usd ~ daily_rate_usd + days_on_site + is_reactive + emp_type,
data = model_data_cost)
summary(cost_model)
##
## Call:
## lm(formula = total_impact_usd ~ daily_rate_usd + days_on_site +
## is_reactive + emp_type, data = model_data_cost)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43583 -7829 -874 7142 45198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -104.73 8011.48 -0.013 0.990
## daily_rate_usd 16.20 11.56 1.402 0.165
## days_on_site 98.21 716.32 0.137 0.891
## is_reactive 40385.10 4184.31 9.652 1.49e-14 ***
## emp_typeFull-Time -99.37 5295.11 -0.019 0.985
## emp_typePart-Time 4289.41 4987.43 0.860 0.393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18090 on 71 degrees of freedom
## Multiple R-squared: 0.5736, Adjusted R-squared: 0.5435
## F-statistic: 19.1 on 5 and 71 DF, p-value: 5.434e-12
Key finding: Each additional day on-site adds $98 to total impact. Reactive repairs add $40,385 to total impact vs preventive.
model_data_csat <- sr %>%
filter(!is.na(customer_sat_score), !is.na(days_to_assign), !is.na(days_to_resolve)) %>%
mutate(
priority = factor(priority),
issue_type = factor(issue_type),
contractor = factor(contractor_used_yn),
sla_breached = factor(sla_breached_yn)
) %>%
select(customer_sat_score, days_to_assign, days_to_resolve,
repair_cost_usd, priority, issue_type, contractor, sla_breached)
set.seed(42)
train_idx <- createDataPartition(model_data_csat$customer_sat_score, p = 0.7, list = FALSE)
train_data <- model_data_csat[train_idx, ]
test_data <- model_data_csat[-train_idx, ]
rf_model <- randomForest(customer_sat_score ~ ., data = train_data,
ntree = 500, importance = TRUE)
print(rf_model)
##
## Call:
## randomForest(formula = customer_sat_score ~ ., data = train_data, ntree = 500, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 2
##
## Mean of squared residuals: 1.377069
## % Var explained: -23.15
importance_df <- as.data.frame(importance(rf_model)) %>%
rownames_to_column("variable") %>%
arrange(desc(`%IncMSE`))
ggplot(importance_df, aes(x = reorder(variable, `%IncMSE`), y = `%IncMSE`)) +
geom_col(fill = "#8b5cf6", width = 0.6) +
geom_text(aes(label = round(`%IncMSE`, 1)), hjust = -0.2, fontface = "bold", size = 3.5) +
coord_flip(ylim = c(0, max(importance_df$`%IncMSE`) * 1.15)) +
labs(title = "Random Forest: Feature Importance for CSAT Prediction",
subtitle = "Which factors most influence customer satisfaction?",
x = NULL, y = "% Increase in MSE When Removed")
predictions <- predict(rf_model, test_data)
rf_rmse <- sqrt(mean((predictions - test_data$customer_sat_score)^2))
rf_r2 <- cor(predictions, test_data$customer_sat_score)^2
Random Forest Test Performance: RMSE = 1.013 | R-squared = 0.02
tibble(actual = test_data$customer_sat_score, predicted = predictions) %>%
ggplot(aes(x = actual, y = predicted)) +
geom_point(alpha = 0.6, color = "#8b5cf6", size = 2.5) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "#ef4444") +
labs(title = "Predicted vs Actual Customer Satisfaction",
subtitle = paste0("Random Forest | RMSE = ", round(rf_rmse, 3),
" | R-squared = ", round(rf_r2, 3)),
x = "Actual CSAT", y = "Predicted CSAT")
tibble(
Metric = c(
"SLA Breach Rate",
"Avg Days to Assign",
"Contractor vs Internal Cost",
"Reactive Downtime Multiplier",
"Workforce Visibility Gap",
"Skills Data Quality",
"Total Financial Impact",
"Projected Annual Savings"
),
Current = c(
"88.5%", "7.2 days", "$10,267 vs $2,689", "11x (38.4h vs 3.5h)",
"8% visible / 67% actual util.", "63.3% records flawed", "$2.08M", "N/A"
),
Target = c(
"<15%", "<=2 days", "80% internal dispatch", "<2x with predictive maint.",
"Real-time availability", "<5% error rate", "<$800K", "~$588K/year"
),
`What This Proves` = c(
"No prioritization logic exists. System treats all tickets equally.",
"Bottleneck is data access, not the search method.",
"3.8x cost premium justifies building an internal matching system.",
"Shifting 30% reactive to preventive saves ~$363K in revenue losses.",
"33% of capacity is idle but invisible to the after-sales team.",
"Data standardization must happen before any dispatch automation.",
"Scale of the problem justifies IS investment.",
"ROI of 194-292% on $150-200K implementation."
)
) %>%
gt() %>%
tab_header(
title = md("**Ceres Business Analytics: Executive Summary**"),
subtitle = "Key metrics proving the need for a privacy-compliant dispatch system"
) %>%
cols_width(Metric ~ px(155), Current ~ px(145),
Target ~ px(140), `What This Proves` ~ px(330)) %>%
tab_style(style = cell_text(weight = "bold"), locations = cells_column_labels()) %>%
tab_style(style = cell_fill(color = "#fef3cd"), locations = cells_body(columns = `What This Proves`)) %>%
tab_style(style = cell_fill(color = "#fee2e2"), locations = cells_body(columns = Current)) %>%
tab_style(style = cell_fill(color = "#dcfce7"), locations = cells_body(columns = Target))
| Ceres Business Analytics: Executive Summary | |||
| Key metrics proving the need for a privacy-compliant dispatch system | |||
| Metric | Current | Target | What This Proves |
|---|---|---|---|
| SLA Breach Rate | 88.5% | <15% | No prioritization logic exists. System treats all tickets equally. |
| Avg Days to Assign | 7.2 days | <=2 days | Bottleneck is data access, not the search method. |
| Contractor vs Internal Cost | $10,267 vs $2,689 | 80% internal dispatch | 3.8x cost premium justifies building an internal matching system. |
| Reactive Downtime Multiplier | 11x (38.4h vs 3.5h) | <2x with predictive maint. | Shifting 30% reactive to preventive saves ~$363K in revenue losses. |
| Workforce Visibility Gap | 8% visible / 67% actual util. | Real-time availability | 33% of capacity is idle but invisible to the after-sales team. |
| Skills Data Quality | 63.3% records flawed | <5% error rate | Data standardization must happen before any dispatch automation. |
| Total Financial Impact | $2.08M | <$800K | Scale of the problem justifies IS investment. |
| Projected Annual Savings | N/A | ~$588K/year | ROI of 194-292% on $150-200K implementation. |