This report blends cost behavior modeling, break-even performance tracking, and RFM-based customer segmentation to guide strategic decisions. Insights include fixed/variable cost estimates, production performance vs financial thresholds, and customer-level value distribution.
df <- read.csv("better_sample_data.csv") %>% clean_names()
df$date <- as.Date(df$date)
tibble(df)
## # A tibble: 100 × 6
## customer_id date units_produced delivery_mode item_price total_costs
## <int> <date> <int> <chr> <int> <dbl>
## 1 1013 2023-09-21 175 Pickup 30 3593.
## 2 1012 2023-06-03 255 Courier 30 4835.
## 3 1004 2023-09-26 96 Courier 30 2443.
## 4 1026 2023-01-16 261 Pickup 25 4931.
## 5 1001 2023-12-27 217 Courier 25 4318.
## 6 1023 2023-08-19 201 Third-Party 30 4041.
## 7 1030 2023-02-18 180 Courier 20 3922.
## 8 1027 2023-06-24 236 Third-Party 20 4532.
## 9 1010 2023-07-15 267 Courier 30 4954.
## 10 1013 2023-05-22 233 Pickup 20 4565.
## # … with 90 more rows
model <- lm(total_costs ~ units_produced, data = df)
summary(model)
##
## Call:
## lm(formula = total_costs ~ units_produced, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -213.13 -76.91 -14.55 71.53 222.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1015.6223 27.4439 37.01 <2e-16 ***
## units_produced 15.0233 0.1467 102.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 101.8 on 98 degrees of freedom
## Multiple R-squared: 0.9907, Adjusted R-squared: 0.9906
## F-statistic: 1.049e+04 on 1 and 98 DF, p-value: < 2.2e-16
fixed_cost <- coef(model)[1]
variable_cost <- coef(model)[2]
cat("Estimated Fixed Cost: $", round(fixed_cost, 2), "\n")
## Estimated Fixed Cost: $ 1015.62
cat("Variable Cost per Unit: $", round(variable_cost, 2), "\n")
## Variable Cost per Unit: $ 15.02
ggplot(df, aes(x = units_produced, y = total_costs)) +
geom_point(color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "darkred") +
labs(title = "Regression: Total Cost vs Units Produced", x = "Units", y = "Total Cost")
df <- df %>%
mutate(
revenue = units_produced * item_price,
profit = revenue - total_costs
)
tibble(df)
## # A tibble: 100 × 8
## customer_id date units_produced delive…¹ item_…² total…³ revenue profit
## <int> <date> <int> <chr> <int> <dbl> <int> <dbl>
## 1 1013 2023-09-21 175 Pickup 30 3593. 5250 1657.
## 2 1012 2023-06-03 255 Courier 30 4835. 7650 2815.
## 3 1004 2023-09-26 96 Courier 30 2443. 2880 437.
## 4 1026 2023-01-16 261 Pickup 25 4931. 6525 1594.
## 5 1001 2023-12-27 217 Courier 25 4318. 5425 1107.
## 6 1023 2023-08-19 201 Third-P… 30 4041. 6030 1989.
## 7 1030 2023-02-18 180 Courier 20 3922. 3600 -322.
## 8 1027 2023-06-24 236 Third-P… 20 4532. 4720 188.
## 9 1010 2023-07-15 267 Courier 30 4954. 8010 3056.
## 10 1013 2023-05-22 233 Pickup 20 4565. 4660 94.5
## # … with 90 more rows, and abbreviated variable names ¹​delivery_mode,
## # ²​item_price, ³​total_costs
# Total fixed cost from regression
total_fixed_cost <- fixed_cost
# Calculate total units to scale fixed cost across
total_units <- sum(df$units_produced)
# Scaled fixed cost per unit
fixed_cost_per_unit <- total_fixed_cost / total_units
# Add accounting vs contribution profit
df <- df %>%
mutate(
variable_cost_component = variable_cost * units_produced,
# Traditional accounting view (original)
profit_accounting = revenue - (total_costs),
# Contribution view: allocate fixed cost per unit
fixed_cost_scaled = fixed_cost_per_unit * units_produced,
total_cost_scaled = variable_cost_component + fixed_cost_scaled,
profit_contribution = revenue - total_cost_scaled
)
summary(df %>% select(profit_accounting, profit_contribution))
## profit_accounting profit_contribution
## Min. :-795.13 Min. : 250.8
## 1st Qu.: -92.54 1st Qu.: 951.7
## Median : 412.14 Median :1394.9
## Mean : 713.84 Mean :1719.3
## 3rd Qu.:1377.61 3rd Qu.:2420.1
## Max. :3425.28 Max. :4386.0
df_long <- df %>%
select(date, customer_id, profit_accounting, profit_contribution) %>%
pivot_longer(cols = starts_with("profit_"), names_to = "profit_type", values_to = "profit")
ggplot(df_long, aes(x = date, y = profit, color = profit_type)) +
geom_line(alpha = 0.6) +
labs(title = "📈 Profit View: Accounting vs Contribution Margin",
x = "Date", y = "Profit", color = "Profit Type") +
scale_color_manual(values = c("profit_accounting" = "firebrick", "profit_contribution" = "forestgreen")) +
theme_minimal()
daily_be <- round(fixed_cost / (mean(df$item_price) - variable_cost))
df_daily <- df %>%
group_by(date) %>%
summarise(units = sum(units_produced)) %>%
mutate(status = ifelse(units >= daily_be, "Above BE", "Below BE"))
ggplot(df_daily, aes(x = date, y = units, fill = status)) +
geom_col() +
geom_hline(yintercept = daily_be, linetype = "dashed", color = "red") +
scale_fill_manual(values = c("Above BE" = "forestgreen", "Below BE" = "tomato")) +
labs(title = "📆 Daily Production vs Daily Break-Even", y = "Units", x = "Date")
df_monthly <- df %>%
mutate(month = floor_date(date, "month")) %>%
group_by(month) %>%
summarise(
units = sum(units_produced),
work_days = n_distinct(date)
) %>%
mutate(
monthly_be_target = daily_be * work_days,
performance_ratio = round(units / monthly_be_target * 100, 1),
status = ifelse(units >= monthly_be_target, "Above BE", "Below BE")
)
ggplot(df_monthly, aes(x = month)) +
geom_col(aes(y = units, fill = status), width = 20) +
geom_line(aes(y = monthly_be_target), color = "red", size = 1.2) +
geom_text(aes(y = units + 150, label = units), size = 4) +
labs(title = "📅 Monthly Production vs Dynamic Break-Even",
y = "Units Produced", x = "Month") +
scale_fill_manual(
name = "Performance",
values = c("Above BE" = "forestgreen", "Below BE" = "firebrick"))+
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
theme_minimal(base_size = 14)
unit_margin <- mean(df$item_price) - variable_cost
df_customer <- df %>%
group_by(customer_id) %>%
summarise(units = sum(units_produced)) %>%
mutate(
profit = units * unit_margin,
status = ifelse(profit >= fixed_cost, "Break-Even Reached", "Below BE"),
surplus = profit - fixed_cost
)
tibble(df_customer)
## # A tibble: 29 × 5
## customer_id units profit status surplus
## <int> <int> <dbl> <chr> <dbl>
## 1 1001 326 3236. Break-Even Reached 2220.
## 2 1002 122 1211. Break-Even Reached 195.
## 3 1003 519 5152. Break-Even Reached 4136.
## 4 1004 649 6442. Break-Even Reached 5427.
## 5 1005 906 8994. Break-Even Reached 7978.
## 6 1006 163 1618. Break-Even Reached 602.
## 7 1008 801 7951. Break-Even Reached 6936.
## 8 1009 469 4656. Break-Even Reached 3640.
## 9 1010 267 2650. Break-Even Reached 1635.
## 10 1011 1150 11416. Break-Even Reached 10400.
## # … with 19 more rows
ggplot(df_customer, aes(x = reorder(as.character(customer_id), surplus), y = surplus, fill = status)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("Break-Even Reached" = "forestgreen", "Below BE" = "firebrick")) +
labs(title = "💼 Customer-Level Surplus Over Break-Even", x = "Customer", y = "Surplus")
rfm <- df %>%
group_by(customer_id) %>%
summarise(
recency = as.numeric(Sys.Date() - max(date)),
frequency = n(),
monetary = sum(revenue)
) %>%
mutate(
recency_score = ntile(-recency, 5),
frequency_score = ntile(frequency, 5),
monetary_score = ntile(monetary, 5),
rfm_score = recency_score + frequency_score + monetary_score
)
tibble(rfm)
## # A tibble: 29 × 8
## customer_id recency frequency monetary recency_score freque…¹ monet…² rfm_s…³
## <int> <dbl> <int> <int> <int> <int> <int> <int>
## 1 1001 549 2 8695 5 1 2 8
## 2 1002 594 1 2440 4 1 1 6
## 3 1003 557 4 14160 5 3 3 11
## 4 1004 570 4 16910 4 3 3 10
## 5 1005 668 4 19580 2 4 4 10
## 6 1006 865 2 4360 1 1 1 3
## 7 1008 550 4 19890 5 4 4 13
## 8 1009 630 2 14070 2 2 3 7
## 9 1010 714 1 8010 1 1 1 3
## 10 1011 682 5 27650 2 4 5 11
## # … with 19 more rows, and abbreviated variable names ¹​frequency_score,
## # ²​monetary_score, ³​rfm_score
top_customers <- rfm %>% filter(rfm_score >= 13)
top_contrib <- df %>% filter(customer_id %in% top_customers$customer_id)
units_top <- sum(top_contrib$units_produced)
break_even_target <- 260
pct_of_be <- round(100 * units_top / break_even_target, 1)
cat("Top-tier customers contributed", units_top, "units -", pct_of_be, "% of break-even target.\n")
## Top-tier customers contributed 5519 units - 2122.7 % of break-even target.
ggplot(rfm, aes(x = rfm_score)) +
geom_bar(fill = "steelblue") +
labs(title = "RFM Score Distribution", x = "RFM Score", y = "Customer Count")
top_contrib %>%
group_by(customer_id) %>%
summarise(total_units = sum(units_produced)) %>%
top_n(10) %>%
ggplot(aes(x = reorder(as.character(customer_id), total_units), y = total_units)) +
geom_col(fill = "goldenrod") +
coord_flip() +
labs(title = "Top 10 Customers by Units", x = "Customer ID", y = "Units Produced")
df_long <- df %>%
group_by(customer_id) %>%
summarise(
revenue = sum(revenue),
profit_accounting = sum(profit_accounting),
profit_contribution = sum(profit_contribution)
) %>%
pivot_longer(cols = starts_with("profit_"), names_to = "ProfitType", values_to = "Profit") %>%
filter(revenue > 0)
ggplot(df_long, aes(x = reorder(customer_id, Profit), y = Profit, fill = ProfitType)) +
geom_col(position = "dodge") +
coord_flip() +
scale_fill_manual(values = c("profit_accounting" = "tomato", "profit_contribution" = "forestgreen"),
labels = c("Accounting Profit", "Contribution Margin")) +
labs(title = "💼 Customer-Level Profit Comparison",
x = "Customer ID", y = "Total Profit", fill = "Profit View") +
theme_minimal(base_size = 13)
df_delivery_summary <- df %>%
group_by(delivery_mode) %>%
summarise(
avg_margin_accounting = mean(profit_accounting / revenue, na.rm = TRUE) * 100,
avg_margin_contribution = mean(profit_contribution / revenue, na.rm = TRUE) * 100
) %>%
pivot_longer(cols = starts_with("avg_margin"), names_to = "MarginType", values_to = "Margin")
ggplot(df_delivery_summary, aes(x = reorder(delivery_mode, Margin), y = Margin, fill = MarginType)) +
geom_col(position = "dodge", width = 0.6) +
scale_fill_manual(values = c("avg_margin_accounting" = "tomato", "avg_margin_contribution" = "seagreen"),
labels = c("Accounting", "Contribution")) +
labs(title = "🚚 Average Profit Margin by Delivery Mode",
x = "Delivery Mode", y = "Average Margin (%)", fill = "Margin Type") +
theme_minimal(base_size = 13)
-Which customers are profit-positive under both models — and which ones flip from loss to gain when fixed costs are scaled
-Where pricing, cost control, or customer strategy might need attention - The operation has vastly exceeded break-even, both in aggregate and through top-tier customers. - RFM segmentation reveals concentrated customer value. - Daily and monthly break-even tracking can guide real-time ops and strategic decisions. - Recommend doubling down on high-tier segments and maintaining production efficiency.
# Reshape for easier boxplot comparison
df_long_profit <- df %>%
pivot_longer(cols = c(profit_accounting, profit_contribution),
names_to = "profit_type", values_to = "profit_value")
# Box Plot to identify outliers
ggplot(df_long_profit, aes(x = profit_type, y = profit_value, fill = profit_type)) +
geom_boxplot(outlier.colour = "red", outlier.shape = 4, alpha = 0.6) +
scale_fill_manual(values = c("profit_accounting" = "salmon", "profit_contribution" = "seagreen")) +
labs(title = "📦 Profit Distribution by Type with Outliers Highlighted",
x = "Profit Type", y = "Profit per Order") +
theme_minimal(base_size = 13)
# Calculate bounds for each profit type
outlier_bounds <- df_long_profit %>%
group_by(profit_type) %>%
summarise(
Q1 = quantile(profit_value, 0.25, na.rm = TRUE),
Q3 = quantile(profit_value, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 1.5 * IQR,
upper = Q3 + 1.5 * IQR,
.groups = "drop"
)
# Join and flag
df_outliers <- df_long_profit %>%
left_join(outlier_bounds, by = "profit_type") %>%
filter(profit_value < lower | profit_value > upper)
# Preview outliers
head(df_outliers)
## # A tibble: 0 × 18
## # … with 18 variables: customer_id <int>, date <date>, units_produced <int>,
## # delivery_mode <chr>, item_price <int>, total_costs <dbl>, revenue <int>,
## # profit <dbl>, variable_cost_component <dbl>, fixed_cost_scaled <dbl>,
## # total_cost_scaled <dbl>, profit_type <chr>, profit_value <dbl>, Q1 <dbl>,
## # Q3 <dbl>, IQR <dbl>, lower <dbl>, upper <dbl>
-This analysis reveals that many of the negative profit values are driven by outlier transactions, especially under the traditional accounting model. The contribution margin approach narrows the spread, reducing volatility from small-volume orders and disproportionate fixed cost assignment. These insights suggest that refining cost allocation and filtering edge cases can improve performance tracking clarity.