1. Limpieza y exploración inicial

df <- read_excel("Historical Product Demand.xlsx") %>%
  clean_names()

df$date <- as.Date(df$date)
str(df)
## tibble [1,715 × 5] (S3: tbl_df/tbl/data.frame)
##  $ product_code    : chr [1:1715] "Product_0002" "Product_0002" "Product_0002" "Product_0002" ...
##  $ warehouse       : chr [1:1715] "Whse_S" "Whse_S" "Whse_S" "Whse_S" ...
##  $ product_category: chr [1:1715] "Category_005" "Category_005" "Category_005" "Category_005" ...
##  $ date            : Date[1:1715], format: "2012-03-01" "2012-05-02" ...
##  $ order_demand    : num [1:1715] 50000 50000 30000 10000 30000 50000 15000 20000 25000 30000 ...
desc_prod <- df %>%
  group_by(product_code) %>%
  summarise(
    media_demanda = mean(order_demand, na.rm = TRUE),
    sd_demanda = sd(order_demand, na.rm = TRUE),
    mediana = median(order_demand, na.rm = TRUE)
  )
desc_prod
## # A tibble: 4 × 4
##   product_code media_demanda sd_demanda mediana
##   <chr>                <dbl>      <dbl>   <dbl>
## 1 Product_0001          758.      1026.     300
## 2 Product_0002        19458.     35296.   10000
## 3 Product_0003          355.       412.     300
## 4 Product_0004          369.       557.     300

2. Transformación de datos por mes

df_mes <- df %>%
  mutate(mes = floor_date(date, "month")) %>%
  group_by(mes, product_code) %>%
  summarise(demanda_mensual = sum(order_demand, na.rm = TRUE))
## `summarise()` has grouped output by 'mes'. You can override using the `.groups`
## argument.
df_mes
## # A tibble: 239 × 3
## # Groups:   mes [61]
##    mes        product_code demanda_mensual
##    <date>     <chr>                  <dbl>
##  1 2011-12-01 Product_0001             300
##  2 2011-12-01 Product_0002          221000
##  3 2012-01-01 Product_0001            9700
##  4 2012-01-01 Product_0002           65000
##  5 2012-01-01 Product_0003             400
##  6 2012-01-01 Product_0004             300
##  7 2012-02-01 Product_0001           13000
##  8 2012-02-01 Product_0002           19000
##  9 2012-02-01 Product_0003            1200
## 10 2012-02-01 Product_0004            1000
## # ℹ 229 more rows

3. Visualización

g1 <- df_mes %>%
  filter(product_code == "Product_0002") %>%
  ggplot(aes(x = mes, y = demanda_mensual)) +
  geom_line(color = "blue", linewidth = 1.2) +
  geom_point(color = "darkblue", size = 2) +
  theme_minimal() +
  labs(
    title = "Demanda mensual del Producto 0002",
    x = "Mes",
    y = "Demanda"
  )
g1

4. ANOVA de demanda por warehouse para Product_0002

df_prod2 <- df %>% 
  filter(product_code == "Product_0002")

df_prod2 %>% summarise(registros = n())
## # A tibble: 1 × 1
##   registros
##       <int>
## 1       450
ggplot(df_prod2, aes(x = warehouse, y = order_demand, fill = warehouse)) +
  geom_boxplot(alpha = 0.7) +
  theme_minimal() +
  labs(
    title = "Demanda por almacén - Product_0002",
    x = "Almacén",
    y = "Demanda"
  )

anova_prod2 <- aov(order_demand ~ warehouse, data = df_prod2)
summary(anova_prod2)
##              Df    Sum Sq   Mean Sq F value   Pr(>F)    
## warehouse     1 3.137e+10 3.137e+10   26.62 3.73e-07 ***
## Residuals   448 5.280e+11 1.179e+09                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_prod2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = order_demand ~ warehouse, data = df_prod2)
## 
## $warehouse
##                   diff      lwr      upr p adj
## Whse_S-Whse_C 21427.89 13265.32 29590.45 4e-07

Al resultar un valor p del análisis de varianza de \(3.73\times 10^{-7}\), este número es demasiado pequeño para rechazar la hipótesis nula, la cuál supone que, en efecto, las diferencias en la demanda entre cada almacén son significativas y no se deben al azar.