1. Configuración inicial

2. Limpieza y estandarización de datos

3. Serie diaria y completación de fechas

4. Gráfico de serie diaria por recolector

5. Boxplot por día de semana (días activos)

# Boxplot por dia de semana (sobre dias activos)
ggplot(daily |> filter(kg > 0), aes(dow, kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.3) +
  labs(title="Distribucion de kg por dia de semana (dias activos)", x="", y="Kg/dia")
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

6. Boxplot avanzado por día de semana (dispersión + totales)

library(dplyr)
library(ggplot2)
library(scales)
## Warning: package 'scales' was built under R version 4.2.3
# 1) Datos (solo días activos) y asegurar orden de días
base <- daily %>%
  filter(kg > 0) %>%
  mutate(
    recolector = factor(recolector),
    dow = factor(dow, levels = c("lunes","martes","miércoles","jueves","viernes","sábado","domingo"))
  )

# 2) Delta para separar etiquetas (espacio vertical)
delta <- 0.08 * diff(range(base$kg, na.rm = TRUE))  # sube a 0.10 si quedan cerca

# 3) Suma total por día de semana (de todo el año) por recolector + posición de etiqueta
tot_dow_rec <- base %>%
  group_by(dow, recolector) %>%
  summarise(
    total_kg = sum(kg, na.rm = TRUE),
    q3  = quantile(kg, 0.75, na.rm = TRUE),
    iqr = IQR(kg, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(dow) %>%
  mutate(
    upper_whisker = max(q3 + 1.5 * iqr, na.rm = TRUE)  # referencia común por día
  ) %>%
  arrange(dow, recolector) %>%
  group_by(dow) %>%
  mutate(
    offset = (n() - row_number() + 1) * delta,         # separa 2 etiquetas
    y_lab  = upper_whisker + offset,
    lab    = paste0(number(total_kg/1000, accuracy = 0.01), " t")  # en toneladas
    # si quieres kg: lab = paste0(comma(round(total_kg,0)), " kg")
  ) %>%
  ungroup()

# 4) Gráfico: boxplot + dispersión real + suma anual por día de semana (por recolector)
ggplot(base, aes(x = dow, y = kg, fill = recolector)) +
  geom_boxplot(outlier.alpha = 0.25, position = position_dodge(width = 0.75)) +
  geom_point(
    alpha = 0.15, size = 0.8,
    position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
  ) +
  geom_label(
    data = tot_dow_rec,
    aes(x = dow, y = y_lab, label = lab, group = recolector),
    position = position_dodge(width = 0.75),
    inherit.aes = FALSE,
    size = 3.0,
    label.size = 0,
    fill = "white", alpha = 0.85
  ) +
  labs(
    title = "Distribución de kg por día de semana (días activos) + dispersión real + suma anual",
    x = "", y = "Kg/día"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.25))) +
  coord_cartesian(clip = "off") +
  theme(axis.text.x = element_text(angle = 0))

7. Heatmap calendario (total diario)

# Heatmap calendario (total)
daily_total |> 
  mutate(
    mes = month(fecha, label = TRUE, abbr = FALSE),
    dia = day(fecha)
  ) |>
  ggplot(aes(dia, mes, fill = kg_total)) +
  geom_tile() +
  labs(title="Mapa de calor (kg total por dia)", x="Dia del mes", y="Mes")

8. Boxplots por día del mes y por mes (días activos)

daily2 <- daily |>
  mutate(dia_mes = day(fecha))

ggplot(daily2 |> filter(kg > 0), aes(x = factor(dia_mes), y = kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.3) +
  labs(title = "Boxplot de kg por día del mes (solo días activos)",
       x = "Día del mes", y = "Kg/día") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

daily2 <- daily |>
  mutate(mes = month(fecha, label = TRUE, abbr = FALSE))

ggplot(daily2 |> filter(kg > 0), aes(x = mes, y = kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.3) +
  labs(title = "Boxplot de kg por mes (solo días activos)",
       x = "Mes", y = "Kg/día") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily2 |> filter(kg > 0), aes(x = mes, y = kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.2) +
  geom_jitter(width = 0.15, alpha = 0.15, size = 0.8) +
  labs(title = "Kg por mes (días activos) con dispersión real",
       x = "Mes", y = "Kg/día") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.2): Ignoring unknown parameters:
## `outlier_alpha`

9. Boxplots adicionales (por recolector y paneles)

daily_act <- daily |>
  mutate(
    dia_mes = day(fecha),
    mes = month(fecha, label = TRUE, abbr = FALSE)
  ) |>
  filter(kg > 0)

ggplot(daily_act, aes(x = factor(dia_mes), y = kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.3) +
  labs(
    title = "Kg por día del mes (solo días activos) - por recolector",
    x = "Día del mes", y = "Kg/día"
  ) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily_act, aes(x = mes, y = kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.3) +
  labs(
    title = "Kg por mes (solo días activos) - por recolector",
    x = "Mes", y = "Kg/día"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily_act, aes(x = mes, y = kg, fill = recolector)) +
  geom_boxplot(outlier_alpha = 0.3) +
  labs(
    title = "Kg por mes (solo días activos) - por recolector",
    x = "Mes", y = "Kg/día"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

ggplot(daily_act, aes(x = mes, y = kg)) +
  geom_boxplot(outlier_alpha = 0.3) +
  facet_wrap(~ recolector, ncol = 1, scales = "free_y") +
  labs(
    title = "Kg por mes (solo días activos) - panel por recolector",
    x = "Mes", y = "Kg/día"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_boxplot(outlier_alpha = 0.3): Ignoring unknown parameters:
## `outlier_alpha`

10. Heatmaps por recolector (promedios en días activos)

library(dplyr)
library(lubridate)
library(ggplot2)

hm1 <- daily |>
  filter(kg > 0) |>
  mutate(
    dow = wday(fecha, label = TRUE, abbr = FALSE, week_start = 1),
    dia_mes = day(fecha)
  ) |>
  group_by(recolector, dow, dia_mes) |>
  summarise(kg_prom = mean(kg), n = n(), .groups = "drop")

ggplot(hm1, aes(x = dia_mes, y = dow, fill = kg_prom)) +
  geom_tile() +
  facet_wrap(~ recolector, ncol = 1) +
  scale_x_continuous(breaks = 1:31) +
  labs(
    title = "Heatmap: día de semana vs día del mes (promedio kg, días activos)",
    x = "Día del mes",
    y = "Día de la semana",
    fill = "Kg prom"
  ) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

hm2 <- daily |>
  filter(kg > 0) |>
  mutate(
    dow = wday(fecha, label = TRUE, abbr = FALSE, week_start = 1),
    mes = month(fecha, label = TRUE, abbr = FALSE)
  ) |>
  group_by(recolector, dow, mes) |>
  summarise(kg_prom = mean(kg), n = n(), .groups = "drop")

ggplot(hm2, aes(x = mes, y = dow, fill = kg_prom)) +
  geom_tile() +
  facet_wrap(~ recolector, ncol = 1) +
  labs(
    title = "Heatmap: día de semana vs mes (promedio kg, días activos)",
    x = "Mes",
    y = "Día de la semana",
    fill = "Kg prom"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

11. KPIs operativos

kpis <- daily |>
  group_by(recolector) |>
  summarise(
    kg_total = sum(kg),
    dias_activos = sum(kg > 0),
    dias_inactivos = sum(kg == 0),
    kg_prom_dia_cal = mean(kg),
    kg_prom_dia_act = mean(kg[kg > 0]),
    kg_mediana_act = median(kg[kg > 0]),
    p95_act = quantile(kg[kg > 0], 0.95),
    viajes_totales = sum(viajes),
    kg_prom_viaje = sum(kg) / sum(viajes),
    .groups="drop"
  )

kpis
## # A tibble: 2 × 10
##   recolector kg_total dias_activos dias_inactivos kg_prom_dia_cal
##   <fct>         <dbl>        <int>          <int>           <dbl>
## 1 5           2471050          198            167           6770 
## 2 8           3294000          292             73           9025.
## # ℹ 5 more variables: kg_prom_dia_act <dbl>, kg_mediana_act <dbl>,
## #   p95_act <dbl>, viajes_totales <int>, kg_prom_viaje <dbl>
library(dplyr)

kpi_eficiencia <- daily %>%
  group_by(recolector) %>%
  summarise(
    kg_total = sum(kg),
    dias_calendario = n(),
    dias_activos = sum(kg > 0),
    pct_dias_activos = mean(kg > 0),

    viajes_totales = sum(viajes),
    kg_por_viaje = sum(kg) / sum(viajes),

    kg_por_dia_activo = mean(kg[kg > 0]),
    viajes_por_dia_activo = mean(viajes[kg > 0]),

    cv_kg_activo = sd(kg[kg > 0]) / mean(kg[kg > 0]),

    pct_doble_viaje = mean(viajes[kg > 0] >= 2),

    p50_act = median(kg[kg > 0]),
    p95_act = quantile(kg[kg > 0], 0.95),

    .groups = "drop"
  )

kpi_eficiencia
## # A tibble: 2 × 13
##   recolector kg_total dias_calendario dias_activos pct_dias_activos
##   <fct>         <dbl>           <int>        <int>            <dbl>
## 1 5           2471050             365          198            0.542
## 2 8           3294000             365          292            0.8  
## # ℹ 8 more variables: viajes_totales <int>, kg_por_viaje <dbl>,
## #   kg_por_dia_activo <dbl>, viajes_por_dia_activo <dbl>, cv_kg_activo <dbl>,
## #   pct_doble_viaje <dbl>, p50_act <dbl>, p95_act <dbl>

12. Comparación entre recolectores (datos en formato ancho + Wilcoxon pareado)

library(dplyr)
library(tidyr)

wide <- daily |>
  select(fecha, recolector, kg) |>
  mutate(recolector = as.character(recolector)) |>
  pivot_wider(
    names_from = recolector,
    values_from = kg,
    values_fill = 0
  ) |>
  rename(
    kg_r5 = `5`,
    kg_r8 = `8`
  )

both_days <- wide |>
  filter(kg_r5 > 0 & kg_r8 > 0) |>
  mutate(
    dif_kg = kg_r5 - kg_r8,
    mean_kg = (kg_r5 + kg_r8) / 2,
    log_ratio = log((kg_r5 + 1) / (kg_r8 + 1))
  )

both_days |> summarise(n_dias_compartidos = n())
## # A tibble: 1 × 1
##   n_dias_compartidos
##                <int>
## 1                164
w_test <- wilcox.test(
  both_days$kg_r5,
  both_days$kg_r8,
  paired = TRUE,
  conf.int = TRUE,
  conf.level = 0.95,
  exact = FALSE
)

w_test
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  both_days$kg_r5 and both_days$kg_r8
## V = 9668.5, p-value = 1.875e-06
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##   765 1645
## sample estimates:
## (pseudo)median 
##           1245

13. Resumen del efecto (diferencia diaria)

resumen_efecto <- both_days |>
  summarise(
    n = n(),
    mediana_dif = median(dif_kg),
    iqr_dif = IQR(dif_kg),
    p25 = quantile(dif_kg, 0.25),
    p75 = quantile(dif_kg, 0.75),
    prop_r5_mayor = mean(dif_kg > 0)
  )

resumen_efecto
## # A tibble: 1 × 6
##       n mediana_dif iqr_dif   p25   p75 prop_r5_mayor
##   <int>       <dbl>   <dbl> <dbl> <dbl>         <dbl>
## 1   164        1355   3558.  -645 2912.         0.689

14. Diferencia vs fecha y Bland–Altman

library(ggplot2)

ggplot(both_days, aes(x = fecha, y = dif_kg)) +
  geom_hline(yintercept = 0) +
  geom_line(linewidth = 0.6) +
  labs(
    title = "Diferencia diaria (R5 - R8) en días compartidos",
    x = "Fecha", y = "Diferencia (kg)"
  )

ggplot(both_days, aes(x = mean_kg, y = dif_kg)) +
  geom_hline(yintercept = 0) +
  geom_point(alpha = 0.4) +
  labs(
    title = "Bland-Altman: Diferencia vs Promedio del día",
    x = "Promedio del día (kg)", y = "Diferencia (kg)"
  )

15. Pruebas adicionales (log-ratio) y modelo lineal con calendario

w_log <- wilcox.test(both_days$log_ratio, mu = 0, conf.int = TRUE, exact = FALSE)
w_log
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  both_days$log_ratio
## V = 9672, p-value = 1.823e-06
## alternative hypothesis: true location is not equal to 0
## 95 percent confidence interval:
##  0.06981843 0.15096447
## sample estimates:
## (pseudo)median 
##       0.113024
library(lubridate)

both_days_cal <- both_days |>
  mutate(
    mes = month(fecha, label = TRUE, abbr = FALSE),
    dow = wday(fecha, label = TRUE, abbr = FALSE, week_start = 1)
  )

m_cal <- lm(dif_kg ~ mes + dow, data = both_days_cal)
summary(m_cal)
## 
## Call:
## lm(formula = dif_kg ~ mes + dow, data = both_days_cal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13417.4  -1644.4    131.1   2111.4   7944.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   660.63     425.73   1.552 0.122884    
## mes.L        -279.31    1488.54  -0.188 0.851421    
## mes.Q       -1552.04    1320.22  -1.176 0.241671    
## mes.C       -3372.00    2028.41  -1.662 0.098582 .  
## mes^4       -4504.00    1176.68  -3.828 0.000191 ***
## mes^5       -4700.66    1614.43  -2.912 0.004160 ** 
## mes^6       -2847.13    2053.22  -1.387 0.167657    
## mes^7        -521.90    1140.84  -0.457 0.648015    
## mes^8        1751.92    1624.91   1.078 0.282738    
## mes^9        1428.21    2376.60   0.601 0.548806    
## mes^10       -678.11    2037.95  -0.333 0.739809    
## mes^11      -1095.85    1179.48  -0.929 0.354372    
## dow.L         394.33    2096.59   0.188 0.851075    
## dow.Q       -1345.02    1999.56  -0.673 0.502229    
## dow.C         517.56    1588.31   0.326 0.744998    
## dow^4       -1086.42    1077.41  -1.008 0.314950    
## dow^5         -91.58     750.42  -0.122 0.903034    
## dow^6       -1091.61     647.30  -1.686 0.093854 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3331 on 146 degrees of freedom
## Multiple R-squared:  0.3261, Adjusted R-squared:  0.2476 
## F-statistic: 4.156 on 17 and 146 DF,  p-value: 8.83e-07

16. Boxplot mensual con totales por recolector (etiquetas)

library(dplyr)
library(ggplot2)
library(scales)

base <- daily2 %>% filter(kg > 0)

rango <- diff(range(base$kg, na.rm = TRUE))
delta <- 0.06 * rango

tot_mensual_rec <- base %>%
  group_by(mes, recolector) %>%
  summarise(total_kg = sum(kg, na.rm = TRUE), .groups = "drop") %>%
  left_join(
    base %>% group_by(mes) %>% summarise(ymax = max(kg, na.rm = TRUE), .groups = "drop"),
    by = "mes"
  ) %>%
  mutate(
    y_lab = ymax + ifelse(recolector == 5, 2*delta, 1*delta),
    lab = paste0(number(total_kg/1000, accuracy = 0.01), " t")
  )

ggplot(base, aes(x = mes, y = kg, fill = recolector)) +
  geom_boxplot(position = position_dodge(width = 0.75), outlier.alpha = 0.2) +
  geom_point(
    alpha = 0.15, size = 0.8,
    position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
  ) +
  geom_label(
    data = tot_mensual_rec,
    aes(x = mes, y = y_lab, label = lab, group = recolector),
    position = position_dodge(width = 0.75),
    inherit.aes = FALSE,
    size = 3.2,
    label.size = 0,
    fill = "white",
    alpha = 0.80
  ) +
  labs(
    title = "Kg por mes (días activos) con dispersión real + total mensual (por recolector)",
    x = "Mes", y = "Kg/día"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.30))) +
  scale_x_discrete(expand = expansion(add = 0.6)) +
  coord_cartesian(clip = "off") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.margin = margin(10, 10, 10, 30)
  )

17. Boxplot por día del mes con totales por recolector (etiquetas)

library(dplyr)
library(ggplot2)
library(lubridate)
library(scales)

daily2 <- daily %>%
  mutate(dia_mes = day(fecha)) %>%
  filter(kg > 0) %>%
  mutate(
    dia_mes = factor(dia_mes, levels = 1:31),
    recolector = factor(recolector)
  )

delta <- 0.06 * diff(range(daily2$kg, na.rm = TRUE))

tot_dia_mes_rec <- daily2 %>%
  group_by(dia_mes, recolector) %>%
  summarise(
    total_kg = sum(kg, na.rm = TRUE),
    q3  = quantile(kg, 0.75, na.rm = TRUE),
    iqr = IQR(kg, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(dia_mes) %>%
  mutate(
    upper_whisker = max(q3 + 1.5 * iqr, na.rm = TRUE)
  ) %>%
  arrange(dia_mes, recolector) %>%
  group_by(dia_mes) %>%
  mutate(
    offset = (n() - row_number() + 1) * delta,
    y_lab  = upper_whisker + offset,
    lab    = paste0(scales::number(total_kg/1000, accuracy = 0.01), " t")
  ) %>%
  ungroup()

ggplot(daily2, aes(x = dia_mes, y = kg, fill = recolector)) +
  geom_boxplot(outlier.alpha = 0.25, position = position_dodge(width = 0.75)) +
  geom_point(
    alpha = 0.15, size = 0.7,
    position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
  ) +
  geom_label(
    data = tot_dia_mes_rec,
    aes(x = dia_mes, y = y_lab, label = lab, group = recolector),
    position = position_dodge(width = 0.75),
    inherit.aes = FALSE,
    size = 2.5,
    label.size = 0,
    fill = "white", alpha = 0.85
  ) +
  labs(
    title = "Boxplot de kg por día del mes (solo días activos) + dispersión + total por día (por recolector)",
    x = "Día del mes", y = "Kg/día"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.30))) +
  coord_cartesian(clip = "off") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

18. Boxplot por día del mes (dispersión real)

ggplot(daily2 %>% filter(kg > 0), aes(x = factor(dia_mes), y = kg, fill = recolector)) +
  geom_boxplot(outlier.alpha = 0.25, position = position_dodge(width = 0.75)) +
  geom_point(
    alpha = 0.15, size = 0.7,
    position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75)
  ) +
  labs(
    title = "Boxplot de kg por día del mes (solo días activos) + dispersión real",
    x = "Día del mes", y = "Kg/día"
  ) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))