1. Configuración inicial
2. Limpieza y estandarización de datos
3. Serie diaria y completación de fechas
4. Gráfico de serie diaria por recolector
5. Boxplot por día de semana (días activos)
# Boxplot por dia de semana (sobre dias activos)
ggplot(daily |> filter(kg > 0), aes(dow, kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.3) +
labs(title="Distribucion de kg por dia de semana (dias activos)", x="", y="Kg/dia")
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

6. Boxplot avanzado por día de semana (dispersión + totales)
library(dplyr)
library(ggplot2)
library(scales)
## Warning: package 'scales' was built under R version 4.2.3
# 1) Datos (solo días activos) y asegurar orden de días
base <- daily %>%
filter(kg > 0) %>%
mutate(
recolector = factor(recolector),
dow = factor(dow, levels = c("lunes","martes","miércoles","jueves","viernes","sábado","domingo"))
)
# 2) Delta para separar etiquetas (espacio vertical)
delta <- 0.08 * diff(range(base$kg, na.rm = TRUE)) # sube a 0.10 si quedan cerca
# 3) Suma total por día de semana (de todo el año) por recolector + posición de etiqueta
tot_dow_rec <- base %>%
group_by(dow, recolector) %>%
summarise(
total_kg = sum(kg, na.rm = TRUE),
q3 = quantile(kg, 0.75, na.rm = TRUE),
iqr = IQR(kg, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(dow) %>%
mutate(
upper_whisker = max(q3 + 1.5 * iqr, na.rm = TRUE) # referencia común por día
) %>%
arrange(dow, recolector) %>%
group_by(dow) %>%
mutate(
offset = (n() - row_number() + 1) * delta, # separa 2 etiquetas
y_lab = upper_whisker + offset,
lab = paste0(number(total_kg/1000, accuracy = 0.01), " t") # en toneladas
# si quieres kg: lab = paste0(comma(round(total_kg,0)), " kg")
) %>%
ungroup()
# 4) Gráfico: boxplot + dispersión real + suma anual por día de semana (por recolector)
ggplot(base, aes(x = dow, y = kg, fill = recolector)) +
geom_boxplot(outlier.alpha = 0.25, position = position_dodge(width = 0.75)) +
geom_point(
alpha = 0.15, size = 0.8,
position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
) +
geom_label(
data = tot_dow_rec,
aes(x = dow, y = y_lab, label = lab, group = recolector),
position = position_dodge(width = 0.75),
inherit.aes = FALSE,
size = 3.0,
label.size = 0,
fill = "white", alpha = 0.85
) +
labs(
title = "Distribución de kg por día de semana (días activos) + dispersión real + suma anual",
x = "", y = "Kg/día"
) +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.25))) +
coord_cartesian(clip = "off") +
theme(axis.text.x = element_text(angle = 0))

7. Heatmap calendario (total diario)
# Heatmap calendario (total)
daily_total |>
mutate(
mes = month(fecha, label = TRUE, abbr = FALSE),
dia = day(fecha)
) |>
ggplot(aes(dia, mes, fill = kg_total)) +
geom_tile() +
labs(title="Mapa de calor (kg total por dia)", x="Dia del mes", y="Mes")

8. Boxplots por día del mes y por mes (días activos)
daily2 <- daily |>
mutate(dia_mes = day(fecha))
ggplot(daily2 |> filter(kg > 0), aes(x = factor(dia_mes), y = kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.3) +
labs(title = "Boxplot de kg por día del mes (solo días activos)",
x = "Día del mes", y = "Kg/día") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

daily2 <- daily |>
mutate(mes = month(fecha, label = TRUE, abbr = FALSE))
ggplot(daily2 |> filter(kg > 0), aes(x = mes, y = kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.3) +
labs(title = "Boxplot de kg por mes (solo días activos)",
x = "Mes", y = "Kg/día") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily2 |> filter(kg > 0), aes(x = mes, y = kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.2) +
geom_jitter(width = 0.15, alpha = 0.15, size = 0.8) +
labs(title = "Kg por mes (días activos) con dispersión real",
x = "Mes", y = "Kg/día") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.2): Ignoring unknown parameters:
## `outlier_alpha`

9. Boxplots adicionales (por recolector y paneles)
daily_act <- daily |>
mutate(
dia_mes = day(fecha),
mes = month(fecha, label = TRUE, abbr = FALSE)
) |>
filter(kg > 0)
ggplot(daily_act, aes(x = factor(dia_mes), y = kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.3) +
labs(
title = "Kg por día del mes (solo días activos) - por recolector",
x = "Día del mes", y = "Kg/día"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily_act, aes(x = mes, y = kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.3) +
labs(
title = "Kg por mes (solo días activos) - por recolector",
x = "Mes", y = "Kg/día"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily_act, aes(x = mes, y = kg, fill = recolector)) +
geom_boxplot(outlier_alpha = 0.3) +
labs(
title = "Kg por mes (solo días activos) - por recolector",
x = "Mes", y = "Kg/día"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily_act, aes(x = mes, y = kg)) +
geom_boxplot(outlier_alpha = 0.3) +
facet_wrap(~ recolector, ncol = 1, scales = "free_y") +
labs(
title = "Kg por mes (solo días activos) - panel por recolector",
x = "Mes", y = "Kg/día"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

10. Heatmaps por recolector (promedios en días activos)
library(dplyr)
library(lubridate)
library(ggplot2)
hm1 <- daily |>
filter(kg > 0) |>
mutate(
dow = wday(fecha, label = TRUE, abbr = FALSE, week_start = 1),
dia_mes = day(fecha)
) |>
group_by(recolector, dow, dia_mes) |>
summarise(kg_prom = mean(kg), n = n(), .groups = "drop")
ggplot(hm1, aes(x = dia_mes, y = dow, fill = kg_prom)) +
geom_tile() +
facet_wrap(~ recolector, ncol = 1) +
scale_x_continuous(breaks = 1:31) +
labs(
title = "Heatmap: día de semana vs día del mes (promedio kg, días activos)",
x = "Día del mes",
y = "Día de la semana",
fill = "Kg prom"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

hm2 <- daily |>
filter(kg > 0) |>
mutate(
dow = wday(fecha, label = TRUE, abbr = FALSE, week_start = 1),
mes = month(fecha, label = TRUE, abbr = FALSE)
) |>
group_by(recolector, dow, mes) |>
summarise(kg_prom = mean(kg), n = n(), .groups = "drop")
ggplot(hm2, aes(x = mes, y = dow, fill = kg_prom)) +
geom_tile() +
facet_wrap(~ recolector, ncol = 1) +
labs(
title = "Heatmap: día de semana vs mes (promedio kg, días activos)",
x = "Mes",
y = "Día de la semana",
fill = "Kg prom"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

11. KPIs operativos
kpis <- daily |>
group_by(recolector) |>
summarise(
kg_total = sum(kg),
dias_activos = sum(kg > 0),
dias_inactivos = sum(kg == 0),
kg_prom_dia_cal = mean(kg),
kg_prom_dia_act = mean(kg[kg > 0]),
kg_mediana_act = median(kg[kg > 0]),
p95_act = quantile(kg[kg > 0], 0.95),
viajes_totales = sum(viajes),
kg_prom_viaje = sum(kg) / sum(viajes),
.groups="drop"
)
kpis
## # A tibble: 2 × 10
## recolector kg_total dias_activos dias_inactivos kg_prom_dia_cal
## <fct> <dbl> <int> <int> <dbl>
## 1 5 2471050 198 167 6770
## 2 8 3294000 292 73 9025.
## # ℹ 5 more variables: kg_prom_dia_act <dbl>, kg_mediana_act <dbl>,
## # p95_act <dbl>, viajes_totales <int>, kg_prom_viaje <dbl>
library(dplyr)
kpi_eficiencia <- daily %>%
group_by(recolector) %>%
summarise(
kg_total = sum(kg),
dias_calendario = n(),
dias_activos = sum(kg > 0),
pct_dias_activos = mean(kg > 0),
viajes_totales = sum(viajes),
kg_por_viaje = sum(kg) / sum(viajes),
kg_por_dia_activo = mean(kg[kg > 0]),
viajes_por_dia_activo = mean(viajes[kg > 0]),
cv_kg_activo = sd(kg[kg > 0]) / mean(kg[kg > 0]),
pct_doble_viaje = mean(viajes[kg > 0] >= 2),
p50_act = median(kg[kg > 0]),
p95_act = quantile(kg[kg > 0], 0.95),
.groups = "drop"
)
kpi_eficiencia
## # A tibble: 2 × 13
## recolector kg_total dias_calendario dias_activos pct_dias_activos
## <fct> <dbl> <int> <int> <dbl>
## 1 5 2471050 365 198 0.542
## 2 8 3294000 365 292 0.8
## # ℹ 8 more variables: viajes_totales <int>, kg_por_viaje <dbl>,
## # kg_por_dia_activo <dbl>, viajes_por_dia_activo <dbl>, cv_kg_activo <dbl>,
## # pct_doble_viaje <dbl>, p50_act <dbl>, p95_act <dbl>
13. Resumen del efecto (diferencia diaria)
resumen_efecto <- both_days |>
summarise(
n = n(),
mediana_dif = median(dif_kg),
iqr_dif = IQR(dif_kg),
p25 = quantile(dif_kg, 0.25),
p75 = quantile(dif_kg, 0.75),
prop_r5_mayor = mean(dif_kg > 0)
)
resumen_efecto
## # A tibble: 1 × 6
## n mediana_dif iqr_dif p25 p75 prop_r5_mayor
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 164 1355 3558. -645 2912. 0.689
14. Diferencia vs fecha y Bland–Altman
library(ggplot2)
ggplot(both_days, aes(x = fecha, y = dif_kg)) +
geom_hline(yintercept = 0) +
geom_line(linewidth = 0.6) +
labs(
title = "Diferencia diaria (R5 - R8) en días compartidos",
x = "Fecha", y = "Diferencia (kg)"
)

ggplot(both_days, aes(x = mean_kg, y = dif_kg)) +
geom_hline(yintercept = 0) +
geom_point(alpha = 0.4) +
labs(
title = "Bland-Altman: Diferencia vs Promedio del día",
x = "Promedio del día (kg)", y = "Diferencia (kg)"
)

15. Pruebas adicionales (log-ratio) y modelo lineal con
calendario
w_log <- wilcox.test(both_days$log_ratio, mu = 0, conf.int = TRUE, exact = FALSE)
w_log
##
## Wilcoxon signed rank test with continuity correction
##
## data: both_days$log_ratio
## V = 9672, p-value = 1.823e-06
## alternative hypothesis: true location is not equal to 0
## 95 percent confidence interval:
## 0.06981843 0.15096447
## sample estimates:
## (pseudo)median
## 0.113024
library(lubridate)
both_days_cal <- both_days |>
mutate(
mes = month(fecha, label = TRUE, abbr = FALSE),
dow = wday(fecha, label = TRUE, abbr = FALSE, week_start = 1)
)
m_cal <- lm(dif_kg ~ mes + dow, data = both_days_cal)
summary(m_cal)
##
## Call:
## lm(formula = dif_kg ~ mes + dow, data = both_days_cal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13417.4 -1644.4 131.1 2111.4 7944.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 660.63 425.73 1.552 0.122884
## mes.L -279.31 1488.54 -0.188 0.851421
## mes.Q -1552.04 1320.22 -1.176 0.241671
## mes.C -3372.00 2028.41 -1.662 0.098582 .
## mes^4 -4504.00 1176.68 -3.828 0.000191 ***
## mes^5 -4700.66 1614.43 -2.912 0.004160 **
## mes^6 -2847.13 2053.22 -1.387 0.167657
## mes^7 -521.90 1140.84 -0.457 0.648015
## mes^8 1751.92 1624.91 1.078 0.282738
## mes^9 1428.21 2376.60 0.601 0.548806
## mes^10 -678.11 2037.95 -0.333 0.739809
## mes^11 -1095.85 1179.48 -0.929 0.354372
## dow.L 394.33 2096.59 0.188 0.851075
## dow.Q -1345.02 1999.56 -0.673 0.502229
## dow.C 517.56 1588.31 0.326 0.744998
## dow^4 -1086.42 1077.41 -1.008 0.314950
## dow^5 -91.58 750.42 -0.122 0.903034
## dow^6 -1091.61 647.30 -1.686 0.093854 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3331 on 146 degrees of freedom
## Multiple R-squared: 0.3261, Adjusted R-squared: 0.2476
## F-statistic: 4.156 on 17 and 146 DF, p-value: 8.83e-07
16. Boxplot mensual con totales por recolector (etiquetas)
library(dplyr)
library(ggplot2)
library(scales)
base <- daily2 %>% filter(kg > 0)
rango <- diff(range(base$kg, na.rm = TRUE))
delta <- 0.06 * rango
tot_mensual_rec <- base %>%
group_by(mes, recolector) %>%
summarise(total_kg = sum(kg, na.rm = TRUE), .groups = "drop") %>%
left_join(
base %>% group_by(mes) %>% summarise(ymax = max(kg, na.rm = TRUE), .groups = "drop"),
by = "mes"
) %>%
mutate(
y_lab = ymax + ifelse(recolector == 5, 2*delta, 1*delta),
lab = paste0(number(total_kg/1000, accuracy = 0.01), " t")
)
ggplot(base, aes(x = mes, y = kg, fill = recolector)) +
geom_boxplot(position = position_dodge(width = 0.75), outlier.alpha = 0.2) +
geom_point(
alpha = 0.15, size = 0.8,
position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
) +
geom_label(
data = tot_mensual_rec,
aes(x = mes, y = y_lab, label = lab, group = recolector),
position = position_dodge(width = 0.75),
inherit.aes = FALSE,
size = 3.2,
label.size = 0,
fill = "white",
alpha = 0.80
) +
labs(
title = "Kg por mes (días activos) con dispersión real + total mensual (por recolector)",
x = "Mes", y = "Kg/día"
) +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.30))) +
scale_x_discrete(expand = expansion(add = 0.6)) +
coord_cartesian(clip = "off") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.margin = margin(10, 10, 10, 30)
)

17. Boxplot por día del mes con totales por recolector
(etiquetas)
library(dplyr)
library(ggplot2)
library(lubridate)
library(scales)
daily2 <- daily %>%
mutate(dia_mes = day(fecha)) %>%
filter(kg > 0) %>%
mutate(
dia_mes = factor(dia_mes, levels = 1:31),
recolector = factor(recolector)
)
delta <- 0.06 * diff(range(daily2$kg, na.rm = TRUE))
tot_dia_mes_rec <- daily2 %>%
group_by(dia_mes, recolector) %>%
summarise(
total_kg = sum(kg, na.rm = TRUE),
q3 = quantile(kg, 0.75, na.rm = TRUE),
iqr = IQR(kg, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(dia_mes) %>%
mutate(
upper_whisker = max(q3 + 1.5 * iqr, na.rm = TRUE)
) %>%
arrange(dia_mes, recolector) %>%
group_by(dia_mes) %>%
mutate(
offset = (n() - row_number() + 1) * delta,
y_lab = upper_whisker + offset,
lab = paste0(scales::number(total_kg/1000, accuracy = 0.01), " t")
) %>%
ungroup()
ggplot(daily2, aes(x = dia_mes, y = kg, fill = recolector)) +
geom_boxplot(outlier.alpha = 0.25, position = position_dodge(width = 0.75)) +
geom_point(
alpha = 0.15, size = 0.7,
position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
) +
geom_label(
data = tot_dia_mes_rec,
aes(x = dia_mes, y = y_lab, label = lab, group = recolector),
position = position_dodge(width = 0.75),
inherit.aes = FALSE,
size = 2.5,
label.size = 0,
fill = "white", alpha = 0.85
) +
labs(
title = "Boxplot de kg por día del mes (solo días activos) + dispersión + total por día (por recolector)",
x = "Día del mes", y = "Kg/día"
) +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.30))) +
coord_cartesian(clip = "off") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

18. Boxplot por día del mes (dispersión real)
ggplot(daily2 %>% filter(kg > 0), aes(x = factor(dia_mes), y = kg, fill = recolector)) +
geom_boxplot(outlier.alpha = 0.25, position = position_dodge(width = 0.75)) +
geom_point(
alpha = 0.15, size = 0.7,
position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
) +
labs(
title = "Boxplot de kg por día del mes (solo días activos) + dispersión real",
x = "Día del mes", y = "Kg/día"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
