3 numbers every stakeholder needs before reading further.
| Metric | Value | What it means |
|---|---|---|
| Overall dissatisfaction rate | 16.5% of reviewed orders score ≤ 2 | 1 in ~6 customers is unhappy |
| On-time delivery, still dissatisfied | 11.7% of orders delivered early/on-time still score ≤ 2 | Delivery is not the dominant root cause |
| Return/refund complaints | Highest dissatisfaction intensity at ~79% | Process pain amplifies product defect impact |
Analytical flow: Category Risk → Root-Cause Split (delivery vs product) → Text Evidence → Priority Matrix → Recommendations → Clustering.
base_tbl <- order_items %>%
left_join(product_lookup, by = "product_id") %>%
left_join(orders_clean, by = "order_id") %>%
left_join(reviews_clean, by = "order_id") %>%
mutate(category_en = replace_na(category_en, "unknown"))Grain: one row = one order-item. Reviews are aggregated at order level then joined.
negative_themes_vec <- c(
"Product defect/low quality", "Wrong item/mismatch",
"Packaging issue", "Delivery delay/not arrived",
"Seller/service issue", "Return/refund process",
"Price/freight concern"
)
classify_theme <- function(msg) {
case_when(
str_detect(msg, "quebrad|amassad|defeit|estrag|baixa qualidade|pessima qualidade|nao funciona|ruim") ~ "Product defect/low quality",
str_detect(msg, "diferente|nao corresponde|nao e o que pedi|errad|cor errada|tamanho errado") ~ "Wrong item/mismatch",
str_detect(msg, "embalag|caixa|pacote|mal embalado|avariado") ~ "Packaging issue",
str_detect(msg, "atras|demor|prazo|entrega|tarde|nao chegou|nao recebi") ~ "Delivery delay/not arrived",
str_detect(msg, "atendimento|suporte|resposta|vendedor|sac") ~ "Seller/service issue",
str_detect(msg, "cancel|estorno|reembolso|devolu|troca") ~ "Return/refund process",
str_detect(msg, "preco|caro|valor|frete") ~ "Price/freight concern",
TRUE ~ "Other/unclear"
)
}
comment_tbl <- base_tbl %>%
filter(!is.na(review_comment_message), category_en != "unknown") %>%
mutate(
msg = review_comment_message %>%
iconv(from = "", to = "ASCII//TRANSLIT") %>%
str_to_lower() %>%
str_replace_all("[^a-z0-9 ]", " ") %>%
str_squish(),
theme = classify_theme(msg)
)
mismatch_tbl <- comment_tbl %>%
mutate(
negative_theme = theme %in% negative_themes_vec,
high_score_negative_text = if_else(review_band == "High (4-5)" & negative_theme, 1L, 0L),
low_score_nonnegative_text = if_else(review_band == "Low (1-2)" & !negative_theme, 1L, 0L)
) %>%
summarise(
`Total comments with text` = n(),
`High score + negative text (rate)` = percent(mean(high_score_negative_text), 0.1),
`Low score + non-negative text (rate)` = percent(mean(low_score_nonnegative_text), 0.1)
)
kable(mismatch_tbl, align = "lcc") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Total comments with text | High score + negative text (rate) | Low score + non-negative text (rate) |
|---|---|---|
| 47672 | 26.8% | 15.1% |
Implication: ~27% of happy-scored orders contain complaint language, and ~15% of low-scored orders have non-negative text. We must use both numeric scores and text themes together.
MIN_ORDERS <- 100
category_core <- base_tbl %>%
filter(!is.na(review_score), category_en != "unknown") %>%
group_by(category_en) %>%
summarise(
reviewed_orders = n_distinct(order_id),
avg_review = mean(review_score, na.rm = TRUE),
dissatisfaction_rate = mean(dissatisfied, na.rm = TRUE),
delay_rate = mean(delayed, na.rm = TRUE),
.groups = "drop"
) %>%
filter(reviewed_orders >= MIN_ORDERS) %>%
mutate(
z_dissat = as.numeric(scale(dissatisfaction_rate)),
z_vol = as.numeric(scale(log1p(reviewed_orders))),
risk_score = 0.60 * z_dissat + 0.40 * z_vol
) %>%
arrange(desc(risk_score))
top15 <- category_core %>% slice_head(n = 15)
ggplot(top15,
aes(x = reorder(category_en, dissatisfaction_rate),
y = dissatisfaction_rate,
fill = reviewed_orders)) +
geom_col(width = 0.7) +
geom_text(aes(label = percent(dissatisfaction_rate, 0.1)),
hjust = -0.15, size = 3.2, colour = "grey30") +
coord_flip() +
scale_y_continuous(labels = percent_format(0.1),
expand = expansion(mult = c(0, 0.12))) +
scale_fill_gradient(low = CLR_ORANGE, high = CLR_RED, labels = comma_format()) +
labs(
title = "Top 15 Categories by Dissatisfaction Rate",
subtitle = paste0("Categories with >= ", MIN_ORDERS, " reviewed orders | Colour = order volume"),
x = NULL, y = "Dissatisfaction Rate (score <= 2)",
fill = "Reviewed orders",
caption = "Risk score = 60% dissatisfaction rate + 40% log(volume)"
) +
theme_olist()top15 %>%
transmute(
Category = category_en,
`Reviewed Orders` = comma(reviewed_orders),
`Avg Score` = round(avg_review, 2),
`Dissatisfaction %` = percent(dissatisfaction_rate, 0.1),
`Delay Rate %` = percent(delay_rate, 0.1),
`Risk Score` = round(risk_score, 2)
) %>%
kable(align = "lrrrrr") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(1:3, bold = TRUE, background = "#fff3cd")| Category | Reviewed Orders | Avg Score | Dissatisfaction % | Delay Rate % | Risk Score |
|---|---|---|---|---|---|
| office_furniture | 1,273 | 3.48 | 26.3% | 8.8% | 1.66 |
| fashion_male_clothing | 112 | 3.62 | 28.8% | 5.3% | 1.33 |
| bed_bath_table | 9,417 | 3.87 | 19.4% | 8.3% | 1.19 |
| furniture_decor | 6,449 | 3.89 | 19.7% | 8.3% | 1.13 |
| fixed_telephony | 217 | 3.67 | 26.1% | 4.2% | 1.12 |
| computers_accessories | 6,689 | 3.92 | 18.8% | 7.6% | 1.00 |
| watches_gifts | 5,624 | 4.00 | 16.7% | 8.1% | 0.63 |
| audio | 350 | 3.81 | 22.0% | 12.6% | 0.63 |
| telephony | 4,199 | 3.93 | 17.2% | 8.1% | 0.63 |
| housewares | 5,884 | 4.04 | 16.0% | 6.3% | 0.55 |
| baby | 2,885 | 3.99 | 17.2% | 8.5% | 0.52 |
| sports_leisure | 7,720 | 4.09 | 15.0% | 7.2% | 0.47 |
| garden_tools | 3,518 | 4.03 | 16.3% | 7.8% | 0.44 |
| auto | 3,897 | 4.04 | 15.9% | 8.1% | 0.42 |
| home_confort | 397 | 3.83 | 20.3% | 10.1% | 0.41 |
top12_cats <- category_core %>% slice_head(n = 12) %>% pull(category_en)
delay_split <- base_tbl %>%
filter(!is.na(review_score), category_en %in% top12_cats, !is.na(delay_days)) %>%
mutate(
delivery_segment = case_when(
delay_days <= 0 ~ "On-time / Early",
delay_days <= 3 ~ "Late 1-3 days",
delay_days <= 7 ~ "Late 4-7 days",
TRUE ~ "Late > 7 days"
),
delivery_segment = factor(delivery_segment,
levels = c("On-time / Early", "Late 1-3 days", "Late 4-7 days", "Late > 7 days"))
) %>%
group_by(category_en, delivery_segment) %>%
summarise(
n_orders = n_distinct(order_id),
dissatisfaction_rate = mean(dissatisfied, na.rm = TRUE),
.groups = "drop"
) %>%
filter(n_orders >= 30)
ggplot(delay_split,
aes(x = delivery_segment,
y = reorder(category_en, dissatisfaction_rate),
fill = dissatisfaction_rate)) +
geom_tile(colour = "white", linewidth = 0.8) +
geom_text(aes(label = if_else(n_orders >= 30, percent(dissatisfaction_rate, 1), "")),
size = 3, colour = "white", fontface = "bold") +
scale_fill_gradient2(low = CLR_GREEN, mid = CLR_ORANGE, high = CLR_RED,
midpoint = 0.20, labels = percent_format(1)) +
labs(
title = "Dissatisfaction Rate by Delivery Timing",
subtitle = "Cells with < 30 orders hidden | % shown in white = dissatisfaction rate",
x = "Delivery Segment", y = NULL, fill = "Dissatisfaction",
caption = "Key: High dissatisfaction in 'On-time/Early' column -> product issue, not logistics"
) +
theme_olist() +
theme(axis.text.x = element_text(angle = 20, hjust = 1))verdict_tbl <- base_tbl %>%
filter(!is.na(dissatisfied), category_en %in% top12_cats, !is.na(delay_days)) %>%
mutate(ontime = delay_days <= 0) %>%
group_by(category_en, ontime) %>%
summarise(dissat = mean(dissatisfied), n = n_distinct(order_id), .groups = "drop") %>%
pivot_wider(names_from = ontime, values_from = c(dissat, n), names_prefix = "") %>%
rename(
ontime_dissat = dissat_TRUE,
late_dissat = dissat_FALSE,
n_ontime = n_TRUE,
n_late = n_FALSE
) %>%
mutate(
delta = late_dissat - ontime_dissat,
root_cause = case_when(
ontime_dissat >= 0.15 & delta < 0.15 ~ "Product-driven",
ontime_dissat >= 0.15 & delta >= 0.15 ~ "Mixed (Product + Ops)",
ontime_dissat < 0.15 & delta >= 0.15 ~ "Ops/Delivery-driven",
TRUE ~ "Low risk"
)
) %>%
arrange(desc(ontime_dissat))
verdict_tbl %>%
transmute(
Category = category_en,
`On-time Dissatisfied` = percent(ontime_dissat, 0.1),
`Late Dissatisfied` = percent(late_dissat, 0.1),
`Gap (Late - On-time)` = percent(delta, 0.1),
`Root Cause` = root_cause
) %>%
kable(align = "lrrrr") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(which(str_detect(verdict_tbl$root_cause, "Product")), background = "#fde8e8")| Category | On-time Dissatisfied | Late Dissatisfied | Gap (Late - On-time) | Root Cause |
|---|---|---|---|---|
| office_furniture | 22.6% | 57.0% | 34.5% | Mixed (Product + Ops) |
| fashion_male_clothing | 22.0% | 85.7% | 63.7% | Mixed (Product + Ops) |
| fixed_telephony | 21.3% | 72.7% | 51.4% | Mixed (Product + Ops) |
| furniture_decor | 15.2% | 53.6% | 38.5% | Mixed (Product + Ops) |
| bed_bath_table | 15.2% | 56.8% | 41.7% | Mixed (Product + Ops) |
| audio | 14.6% | 71.7% | 57.2% | Ops/Delivery-driven |
| computers_accessories | 14.0% | 54.5% | 40.5% | Ops/Delivery-driven |
| telephony | 12.7% | 50.1% | 37.5% | Ops/Delivery-driven |
| housewares | 12.1% | 50.8% | 38.7% | Ops/Delivery-driven |
| watches_gifts | 11.3% | 58.6% | 47.3% | Ops/Delivery-driven |
| baby | 11.0% | 60.7% | 49.7% | Ops/Delivery-driven |
| sports_leisure | 9.9% | 56.6% | 46.7% | Ops/Delivery-driven |
theme_summary <- comment_tbl %>%
group_by(theme) %>%
summarise(
comments = n(),
dissatisfaction_rate = mean(dissatisfied, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(dissatisfaction_rate))
ggplot(theme_summary %>% filter(theme != "Other/unclear"),
aes(x = comments, y = dissatisfaction_rate,
colour = dissatisfaction_rate, size = comments)) +
geom_point(alpha = 0.85) +
geom_text_repel(aes(label = theme), size = 3.2, max.overlaps = 20,
colour = "grey20", fontface = "bold") +
scale_x_log10(labels = comma_format()) +
scale_y_continuous(labels = percent_format(1), limits = c(0, 1)) +
scale_colour_gradient(low = CLR_ORANGE, high = CLR_RED, guide = "none") +
scale_size_continuous(range = c(4, 14), guide = "none") +
labs(
title = "Complaint Themes: Volume vs Dissatisfaction Intensity",
subtitle = "Bubble size = comment volume | Top-right = highest priority",
x = "Number of Comments (log scale)", y = "Dissatisfaction Rate",
caption = "'Other/unclear' excluded for clarity"
) +
theme_olist()theme_summary %>%
filter(theme != "Other/unclear") %>%
transmute(
Theme = theme,
`Comment Volume` = comma(comments),
`Dissatisfaction Rate` = percent(dissatisfaction_rate, 0.1),
Priority = case_when(
dissatisfaction_rate >= 0.65 ~ "Critical",
dissatisfaction_rate >= 0.45 ~ "High",
TRUE ~ "Medium"
)
) %>%
kable(align = "lrrl") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(
which(theme_summary %>% filter(theme != "Other/unclear") %>% pull(dissatisfaction_rate) >= 0.65),
background = "#fde8e8", bold = TRUE
)| Theme | Comment Volume | Dissatisfaction Rate | Priority |
|---|---|---|---|
| Return/refund process | 808 | 79.2% | Critical |
| Wrong item/mismatch | 1,228 | 68.2% | Critical |
| Product defect/low quality | 1,300 | 63.4% | High |
| Packaging issue | 1,074 | 50.5% | High |
| Seller/service issue | 1,420 | 41.5% | Medium |
| Price/freight concern | 935 | 33.3% | Medium |
| Delivery delay/not arrived | 14,890 | 21.5% | Medium |
theme_cat <- comment_tbl %>%
filter(category_en %in% top12_cats, theme != "Other/unclear") %>%
group_by(category_en, theme) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(category_en) %>%
mutate(share = n / sum(n)) %>%
ungroup()
ggplot(theme_cat,
aes(x = theme, y = reorder(category_en, n), fill = share, size = n)) +
geom_point(shape = 21, colour = "white") +
scale_fill_gradient(low = CLR_ORANGE, high = CLR_RED, labels = percent_format(1)) +
scale_size_continuous(range = c(2, 12)) +
scale_x_discrete(labels = function(x) str_wrap(x, 12)) +
labs(
title = "Complaint Theme Mix by Category",
subtitle = "Bubble size = raw count | Colour = share within category",
x = NULL, y = NULL,
fill = "Share of category\ncomplaints", size = "Comment count",
caption = "Larger + darker = dominant complaint type for that category"
) +
theme_olist() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 9))x_mid <- median(category_core$dissatisfaction_rate)
y_mid <- median(log10(category_core$reviewed_orders))
matrix_data <- category_core %>%
mutate(
log_vol = log10(reviewed_orders),
quadrant = case_when(
dissatisfaction_rate >= x_mid & log_vol >= y_mid ~ "FIX NOW",
dissatisfaction_rate >= x_mid & log_vol < y_mid ~ "INVESTIGATE",
dissatisfaction_rate < x_mid & log_vol >= y_mid ~ "MONITOR",
TRUE ~ "WATCH"
),
q_colour = case_when(
quadrant == "FIX NOW" ~ CLR_RED,
quadrant == "INVESTIGATE" ~ CLR_ORANGE,
quadrant == "MONITOR" ~ CLR_BLUE,
TRUE ~ CLR_GREY
),
label_flag = quadrant %in% c("FIX NOW", "INVESTIGATE") |
risk_score > quantile(risk_score, 0.85, na.rm = TRUE)
)
ggplot(matrix_data,
aes(x = dissatisfaction_rate, y = log_vol,
colour = quadrant, size = reviewed_orders)) +
annotate("rect", xmin = x_mid, xmax = Inf, ymin = y_mid, ymax = Inf, fill = CLR_RED, alpha = 0.05) +
annotate("rect", xmin = x_mid, xmax = Inf, ymin = -Inf, ymax = y_mid, fill = CLR_ORANGE, alpha = 0.05) +
annotate("rect", xmin = -Inf, xmax = x_mid, ymin = y_mid, ymax = Inf, fill = CLR_BLUE, alpha = 0.05) +
annotate("rect", xmin = -Inf, xmax = x_mid, ymin = -Inf, ymax = y_mid, fill = CLR_GREY, alpha = 0.05) +
annotate("text", x = max(matrix_data$dissatisfaction_rate) * 0.97,
y = max(matrix_data$log_vol) * 1.01,
label = "FIX NOW", hjust = 1, fontface = "bold", colour = CLR_RED, size = 4) +
annotate("text", x = max(matrix_data$dissatisfaction_rate) * 0.97,
y = y_mid * 0.97,
label = "INVESTIGATE", hjust = 1, fontface = "bold", colour = CLR_ORANGE, size = 4) +
annotate("text", x = x_mid * 1.02,
y = max(matrix_data$log_vol) * 1.01,
label = "MONITOR", hjust = 0, fontface = "bold", colour = CLR_BLUE, size = 4) +
geom_point(alpha = 0.75) +
geom_text_repel(
data = matrix_data %>% filter(label_flag),
aes(label = str_replace_all(category_en, "_", " ")),
size = 3, fontface = "bold", max.overlaps = 20, colour = "grey20"
) +
geom_vline(xintercept = x_mid, linetype = "dashed", colour = "grey50") +
geom_hline(yintercept = y_mid, linetype = "dashed", colour = "grey50") +
scale_x_continuous(labels = percent_format(1)) +
scale_y_continuous(labels = function(x) comma(10^x)) +
scale_colour_manual(values = c(
"FIX NOW" = CLR_RED, "INVESTIGATE" = CLR_ORANGE,
"MONITOR" = CLR_BLUE, "WATCH" = CLR_GREY
)) +
scale_size_continuous(range = c(2, 10), labels = comma_format()) +
labs(
title = "Category Priority Matrix",
subtitle = "X = dissatisfaction intensity | Y = reviewed order volume",
x = "Dissatisfaction Rate (score <= 2)", y = "Reviewed Orders (log scale)",
colour = "Quadrant", size = "Reviewed orders",
caption = "Dashed lines = median split"
) +
theme_olist()reco_tbl <- matrix_data %>%
filter(quadrant %in% c("FIX NOW", "INVESTIGATE")) %>%
left_join(verdict_tbl %>% select(category_en, root_cause, ontime_dissat), by = "category_en") %>%
left_join(
theme_cat %>%
group_by(category_en) %>%
slice_max(n, n = 1) %>%
select(category_en, top_complaint = theme),
by = "category_en"
) %>%
mutate(
owner = case_when(
str_detect(root_cause, "Product") ~ "Product / Merchant Team",
str_detect(root_cause, "Ops") ~ "Operations / Logistics",
str_detect(root_cause, "Mixed") ~ "Product + Operations (joint)",
TRUE ~ "TBD"
),
recommended_action = case_when(
str_detect(top_complaint, "Wrong item|mismatch") ~
"Audit product listing accuracy (title, photo, spec); enforce SKU mapping checks pre-shipment.",
str_detect(top_complaint, "defect|low quality") ~
"Add incoming quality inspection at seller onboarding; require quality cert for high-volume sellers.",
str_detect(top_complaint, "Packaging") ~
"Define packaging standards by product size/weight; penalise sellers with >10% packaging complaints.",
str_detect(top_complaint, "Delivery") ~
"Review SLA promise-dates; flag sellers with >15% late rate for performance review.",
str_detect(top_complaint, "Return|refund") ~
"Simplify return policy; add one-click return initiation in post-delivery email.",
TRUE ~ "Deep-dive review text sample; run seller scorecard audit."
)
) %>%
arrange(quadrant, desc(dissatisfaction_rate))
reco_tbl %>%
transmute(
Quadrant = quadrant,
Category = str_replace_all(category_en, "_", " "),
`Dissat. Rate` = percent(dissatisfaction_rate, 0.1),
Volume = comma(reviewed_orders),
`Root Cause` = root_cause,
`Top Complaint` = top_complaint,
Owner = owner,
`Recommended Action` = recommended_action
) %>%
kable(align = "llrrlll") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE, font_size = 12) %>%
row_spec(which(reco_tbl$quadrant == "FIX NOW"), background = "#fff0f0", bold = TRUE) %>%
column_spec(8, width = "30em")| Quadrant | Category | Dissat. Rate | Volume | Root Cause | Top Complaint | Owner | Recommended Action |
|---|---|---|---|---|---|---|---|
| FIX NOW | office furniture | 26.3% | 1,273 | Mixed (Product + Ops) | Delivery delay/not arrived | Product / Merchant Team | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | furniture decor | 19.7% | 6,449 | Mixed (Product + Ops) | Delivery delay/not arrived | Product / Merchant Team | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | bed bath table | 19.4% | 9,417 | Mixed (Product + Ops) | Delivery delay/not arrived | Product / Merchant Team | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | computers accessories | 18.8% | 6,689 | Ops/Delivery-driven | Delivery delay/not arrived | Operations / Logistics | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | telephony | 17.2% | 4,199 | Ops/Delivery-driven | Delivery delay/not arrived | Operations / Logistics | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | baby | 17.2% | 2,885 | Ops/Delivery-driven | Delivery delay/not arrived | Operations / Logistics | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | watches gifts | 16.7% | 5,624 | Ops/Delivery-driven | Delivery delay/not arrived | Operations / Logistics | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | garden tools | 16.3% | 3,518 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| FIX NOW | housewares | 16.0% | 5,884 | Ops/Delivery-driven | Delivery delay/not arrived | Operations / Logistics | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| FIX NOW | electronics | 16.0% | 2,550 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| FIX NOW | consoles games | 16.0% | 1,062 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | fashion male clothing | 28.8% | 112 | Mixed (Product + Ops) | Delivery delay/not arrived | Product / Merchant Team | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| INVESTIGATE | fixed telephony | 26.1% | 217 | Mixed (Product + Ops) | Delivery delay/not arrived | Product / Merchant Team | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| INVESTIGATE | audio | 22.0% | 350 | Ops/Delivery-driven | Delivery delay/not arrived | Operations / Logistics | Review SLA promise-dates; flag sellers with >15% late rate for performance review. |
| INVESTIGATE | construction tools safety | 20.6% | 167 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | home confort | 20.3% | 397 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | air conditioning | 19.2% | 253 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | art | 18.7% | 202 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | furniture living room | 18.5% | 422 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | fashion underwear beach | 18.3% | 121 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | home construction | 18.2% | 490 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | costruction tools garden | 17.6% | 194 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | christmas supplies | 17.6% | 128 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | construction tools lights | 17.1% | 244 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | drinks | 16.1% | 297 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| INVESTIGATE | agro industry and commerce | 16.0% | 182 | NA | NA | TBD | Deep-dive review text sample; run seller scorecard audit. |
| # | Finding | Evidence | Action Owner |
|---|---|---|---|
| 1 | Product quality, not delivery, drives most dissatisfaction | 11.7% of on-time orders still score <= 2 | Product / Merchant Team |
| 2 | fashion_male_clothing, office_furniture,
fixed_telephony have highest dissatisfaction rate |
Section 2 ranking | Product Quality |
| 3 | bed_bath_table and furniture_decor are
highest business risk due to volume x rate |
Priority Matrix | Product Quality |
| 4 | Wrong item/mismatch and product defects are most destructive complaint types (63-68% dissat.) | Text theme analysis | Listing accuracy & QC |
| 5 | Return/refund friction amplifies every other problem (79% dissat. rate) | Theme table | Customer Experience |
| 6 | 27% of satisfied customers still wrote complaint text — tracking scores alone underestimates real pain | Mismatch check | Analytics / Monitoring |
North Star recommendation: Build a monthly Category Quality Scorecard combining dissatisfaction rate + on-time dissatisfaction + top complaint theme. Any category crossing 15% on-time dissatisfaction triggers an automatic seller audit.
Goal: Group product categories by their quality-and-delivery profile so that each cluster receives a single targeted intervention.
| Feature | Why it matters |
|---|---|
dissatisfaction_rate |
Overall quality pain |
ontime_dissat |
Product-driven pain (delivery-independent) |
delay_rate |
Operational / logistics pain |
avg_review |
General sentiment |
log_vol |
Business scale |
cluster_input <- category_core %>%
left_join(verdict_tbl %>% select(category_en, ontime_dissat), by = "category_en") %>%
filter(!is.na(ontime_dissat)) %>%
mutate(log_vol = log1p(reviewed_orders)) %>%
select(category_en, dissatisfaction_rate, ontime_dissat, delay_rate, avg_review, log_vol)
cat_names <- cluster_input$category_en
x_scaled <- cluster_input %>%
select(-category_en) %>%
scale()
rownames(x_scaled) <- cat_names
cat("Categories entering clustering:", nrow(x_scaled), "\n")## Categories entering clustering: 12
Normalisation is mandatory for k-means:
scale()converts every feature to mean 0, SD 1 so no variable dominates by sheer magnitude.
set.seed(42)
p_elbow <- fviz_nbclust(x_scaled, kmeans, method = "wss", k.max = 8,
linecolor = CLR_RED) +
labs(title = "Elbow Method (WSS)",
subtitle = "Look for the elbow — point of diminishing returns",
x = "Number of clusters K", y = "Total within-cluster SS") +
theme_olist()
p_sil <- fviz_nbclust(x_scaled, kmeans, method = "silhouette", k.max = 8,
linecolor = CLR_BLUE) +
labs(title = "Average Silhouette Width",
subtitle = "Higher = tighter, better-separated clusters",
x = "Number of clusters K", y = "Average silhouette width") +
theme_olist()
p_elbow + p_sil# Change K_CHOSEN if your plots suggest a different number
K_CHOSEN <- 3
cat("Using K =", K_CHOSEN, "clusters\n")## Using K = 3 clusters
set.seed(42)
km_fit <- kmeans(x_scaled, centers = K_CHOSEN, nstart = 25, iter.max = 100)
cat("k-means variance explained (between-SS / total-SS):",
round(km_fit$betweenss / km_fit$totss, 3), "\n")## k-means variance explained (between-SS / total-SS): 0.826
fviz_cluster(km_fit,
data = x_scaled,
ellipse.type = "convex",
palette = c(CLR_RED, CLR_BLUE, CLR_GREEN, CLR_ORANGE)[1:K_CHOSEN],
ggtheme = theme_olist(),
repel = TRUE) +
labs(title = paste0("k-Means Cluster Map (K = ", K_CHOSEN, ")"),
subtitle = "PCA projection — first 2 components shown")PAM uses real data points as cluster centres (medoids), making it robust to outliers. We use Gower distance so the method handles mixed data types.
# Pass unscaled data — daisy() handles normalisation internally for Gower
gower_dist <- daisy(
cluster_input %>% select(-category_en),
metric = "gower"
)
set.seed(42)
pam_fit <- pam(gower_dist, k = K_CHOSEN, diss = TRUE)
cat("PAM medoids (representative categories):\n")## PAM medoids (representative categories):
## [1] "fixed_telephony" "furniture_decor" "watches_gifts"
fviz_cluster(
list(data = x_scaled, cluster = pam_fit$clustering),
ellipse.type = "convex",
palette = c(CLR_RED, CLR_BLUE, CLR_GREEN, CLR_ORANGE)[1:K_CHOSEN],
ggtheme = theme_olist(),
repel = TRUE
) +
labs(title = paste0("PAM Cluster Map (K = ", K_CHOSEN, ", Gower distance)"),
subtitle = "PCA projection — first 2 components shown")# Helper: compute Rand and Jaccard from two label vectors (no fossil needed)
rand_jaccard <- function(a, b) {
n <- length(a)
# all pairs
pairs <- combn(n, 2)
same_a <- a[pairs[1,]] == a[pairs[2,]]
same_b <- b[pairs[1,]] == b[pairs[2,]]
tp <- sum( same_a & same_b) # both same
tn <- sum(!same_a & !same_b) # both different
fp <- sum(!same_a & same_b) # different in a, same in b
fn <- sum( same_a & !same_b) # same in a, different in b
rand <- (tp + tn) / (tp + tn + fp + fn)
jaccard <- tp / (tp + fp + fn)
list(rand = rand, jaccard = jaccard)
}
km_labels <- km_fit$cluster
pam_labels <- pam_fit$clustering
rj <- rand_jaccard(km_labels, pam_labels)
ari <- adjustedRandIndex(km_labels, pam_labels) # from mclust
index_tbl <- tibble(
Index = c("Rand Index", "Adjusted Rand Index (ARI)", "Jaccard Similarity"),
Value = round(c(rj$rand, ari, rj$jaccard), 3),
Interpretation = c(
"Proportion of pairs that agree between k-means and PAM (1 = identical)",
"Rand corrected for chance; >0.80 = strong agreement",
"Agreement on co-clustered pairs only — a zoom-in on positive matches"
)
)
kable(index_tbl, align = "llr") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Index | Value | Interpretation |
|---|---|---|
| Rand Index | 0.727 | Proportion of pairs that agree between k-means and PAM (1 = identical) |
| Adjusted Rand Index (ARI) | 0.440 | Rand corrected for chance; >0.80 = strong agreement |
| Jaccard Similarity | 0.471 | Agreement on co-clustered pairs only — a zoom-in on positive matches |
Rule of thumb: ARI > 0.80 means the two methods agree strongly — either clustering can be used. ARI < 0.60 signals real disagreement — prefer PAM (more robust, handles mixed data via Gower distance).
profile_tbl <- cluster_input %>%
mutate(
km_cluster = factor(km_fit$cluster, labels = paste0("KM-", seq_len(K_CHOSEN))),
pam_cluster = factor(pam_fit$clustering, labels = paste0("PAM-", seq_len(K_CHOSEN)))
)
km_profile <- profile_tbl %>%
group_by(Cluster = km_cluster) %>%
summarise(
`N categories` = n(),
`Avg Dissatisfaction` = percent(mean(dissatisfaction_rate), 0.1),
`Avg On-time Dissat.` = percent(mean(ontime_dissat), 0.1),
`Avg Delay Rate` = percent(mean(delay_rate), 0.1),
`Avg Review Score` = round(mean(avg_review), 2),
`Example categories` = paste(head(category_en, 3), collapse = ", "),
.groups = "drop"
)
kable(km_profile, caption = "k-Means Cluster Profiles", align = "lrrrrrl") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
column_spec(7, width = "25em")| Cluster | N categories | Avg Dissatisfaction | Avg On-time Dissat. | Avg Delay Rate | Avg Review Score | Example categories |
|---|---|---|---|---|---|---|
| KM-1 | 1 | 22.0% | 14.6% | 12.6% | 3.81 | audio |
| KM-2 | 3 | 27.1% | 22.0% | 6.1% | 3.59 | office_furniture, fashion_male_clothing, fixed_telephony |
| KM-3 | 8 | 17.5% | 12.7% | 7.8% | 3.97 | bed_bath_table, furniture_decor, computers_accessories |
cluster_means <- profile_tbl %>%
group_by(Cluster = km_cluster) %>%
summarise(across(dissatisfaction_rate:log_vol, mean), .groups = "drop") %>%
pivot_longer(-Cluster, names_to = "Feature", values_to = "Mean")
ggplot(cluster_means, aes(x = Feature, y = Cluster, fill = Mean)) +
geom_tile(colour = "white", linewidth = 0.6) +
geom_text(aes(label = round(Mean, 2)), colour = "white", fontface = "bold", size = 3.5) +
scale_fill_gradient2(low = CLR_BLUE, mid = CLR_ORANGE, high = CLR_RED, midpoint = 0) +
scale_x_discrete(labels = c(
dissatisfaction_rate = "Dissatisfaction\nRate",
ontime_dissat = "On-time\nDissat.",
delay_rate = "Delay Rate",
avg_review = "Avg Review\nScore",
log_vol = "Log Volume"
)) +
labs(title = "Cluster Feature Heatmap (k-Means, raw means)",
subtitle = "Red = high value | Blue = low value",
x = NULL, y = "Cluster", fill = "Mean") +
theme_olist()# Adjust labels after inspecting your actual heatmap above
strategy_tbl <- tibble(
Cluster = paste0("KM-", 1:K_CHOSEN),
`Likely Label` = c(
"High-risk: product + delivery pain",
"Moderate: product-driven dissatisfaction",
"Low-risk: operational under-performers"
)[1:K_CHOSEN],
`Recommended Intervention` = c(
"Immediate seller quality audit + return policy fix",
"SKU accuracy review; incoming QC at onboarding",
"SLA promise-date review; logistics partner audit"
)[1:K_CHOSEN],
Owner = c(
"Product + CX (joint)",
"Product / Merchant Team",
"Operations / Logistics"
)[1:K_CHOSEN]
)
kable(strategy_tbl, align = "llll") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(1, background = "#fde8e8", bold = TRUE)| Cluster | Likely Label | Recommended Intervention | Owner |
|---|---|---|---|
| KM-1 | High-risk: product + delivery pain | Immediate seller quality audit + return policy fix | Product + CX (joint) |
| KM-2 | Moderate: product-driven dissatisfaction | SKU accuracy review; incoming QC at onboarding | Product / Merchant Team |
| KM-3 | Low-risk: operational under-performers | SLA promise-date review; logistics partner audit | Operations / Logistics |
North Star integration: Feed cluster membership back into the monthly Category Quality Scorecard. Track whether categories migrate between clusters over time — a move from Cluster 1 to Cluster 3 is a concrete, measurable quality improvement.
Report generated with R 4.5.3 · Dataset: Olist E-Commerce Public Dataset (Kaggle)