Introducción

PASO 1. Cargar librerias

# Cargar librerías necesarias
library(readxl)
library(dplyr)
library(ggplot2)
library(plotly)
library(knitr)
library(kableExtra)
library(tidyr)
library(tidyverse)

PASO 2: Cargar la base de datos

# Cargar datos desde el archivo Excel
datadidas <- read_excel("C:/Users/daniel.medina/Documents/maestria/Analisis Financiero/taller 1/DatosCaso1.xlsx")

para observar las variables y las primeras filas de la base de datos

# Ver las primeras filas de los datos
class(datadidas)  
## [1] "tbl_df"     "tbl"        "data.frame"
colnames(datadidas)  #listar variables
##  [1] "distribuidor"       "region"             "estado"            
##  [4] "ciudad"             "producto"           "precio_unidad"     
##  [7] "unidades_vendidas"  "ventas_total"       "utilidad_operativa"
## [10] "margen_operativo"   "metodo_venta"
head(datadidas)  # primeras lineas de la base de datos
distribuidor region estado ciudad producto precio_unidad unidades_vendidas ventas_total utilidad_operativa margen_operativo metodo_venta
Foot Locker Northeast New York New York Men’s Street Footwear 50 1200 60000 30000.0 0.50 In-store
Foot Locker Northeast New York New York Men’s Athletic Footwear 50 1000 50000 15000.0 0.30 In-store
Foot Locker Northeast New York New York Women’s Street Footwear 40 1000 40000 14000.0 0.35 In-store
Foot Locker Northeast New York New York Women’s Athletic Footwear 45 850 38250 13387.5 0.35 In-store
Foot Locker Northeast New York New York Men’s Apparel 60 900 54000 16200.0 0.30 In-store
Foot Locker Northeast New York New York Women’s Apparel 50 1000 50000 12500.0 0.25 In-store
# Ver estructura
str(datadidas)
## tibble [9,648 × 11] (S3: tbl_df/tbl/data.frame)
##  $ distribuidor      : chr [1:9648] "Foot Locker" "Foot Locker" "Foot Locker" "Foot Locker" ...
##  $ region            : chr [1:9648] "Northeast" "Northeast" "Northeast" "Northeast" ...
##  $ estado            : chr [1:9648] "New York" "New York" "New York" "New York" ...
##  $ ciudad            : chr [1:9648] "New York" "New York" "New York" "New York" ...
##  $ producto          : chr [1:9648] "Men's Street Footwear" "Men's Athletic Footwear" "Women's Street Footwear" "Women's Athletic Footwear" ...
##  $ precio_unidad     : num [1:9648] 50 50 40 45 60 50 50 50 40 45 ...
##  $ unidades_vendidas : num [1:9648] 1200 1000 1000 850 900 1000 1250 900 950 825 ...
##  $ ventas_total      : num [1:9648] 60000 50000 40000 38250 54000 ...
##  $ utilidad_operativa: num [1:9648] 30000 15000 14000 13388 16200 ...
##  $ margen_operativo  : num [1:9648] 0.5 0.3 0.35 0.35 0.3 0.25 0.5 0.3 0.35 0.35 ...
##  $ metodo_venta      : chr [1:9648] "In-store" "In-store" "In-store" "In-store" ...
# Ver primeras filas
head(datadidas)
distribuidor region estado ciudad producto precio_unidad unidades_vendidas ventas_total utilidad_operativa margen_operativo metodo_venta
Foot Locker Northeast New York New York Men’s Street Footwear 50 1200 60000 30000.0 0.50 In-store
Foot Locker Northeast New York New York Men’s Athletic Footwear 50 1000 50000 15000.0 0.30 In-store
Foot Locker Northeast New York New York Women’s Street Footwear 40 1000 40000 14000.0 0.35 In-store
Foot Locker Northeast New York New York Women’s Athletic Footwear 45 850 38250 13387.5 0.35 In-store
Foot Locker Northeast New York New York Men’s Apparel 60 900 54000 16200.0 0.30 In-store
Foot Locker Northeast New York New York Women’s Apparel 50 1000 50000 12500.0 0.25 In-store
# Resumen general
summary(datadidas)
##  distribuidor          region             estado             ciudad         
##  Length:9648        Length:9648        Length:9648        Length:9648       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    producto         precio_unidad    unidades_vendidas  ventas_total  
##  Length:9648        Min.   :  7.00   Min.   :   0.0    Min.   :    0  
##  Class :character   1st Qu.: 35.00   1st Qu.: 106.0    1st Qu.: 4065  
##  Mode  :character   Median : 45.00   Median : 176.0    Median : 7804  
##                     Mean   : 45.22   Mean   : 256.9    Mean   :12455  
##                     3rd Qu.: 55.00   3rd Qu.: 350.0    3rd Qu.:15864  
##                     Max.   :110.00   Max.   :1275.0    Max.   :82500  
##  utilidad_operativa margen_operativo metodo_venta      
##  Min.   :    0      Min.   :0.100    Length:9648       
##  1st Qu.: 1753      1st Qu.:0.350    Class :character  
##  Median : 3263      Median :0.410    Mode  :character  
##  Mean   : 4895      Mean   :0.423                      
##  3rd Qu.: 6192      3rd Qu.:0.490                      
##  Max.   :39000      Max.   :0.800

Análisis del desempeño de ventas

Esta sección presenta el comportamiento de las ventas totales y por producto según la región, referencia del producto, distribuidor y método de venta, permitiendo identificar los mercados de mayor relevancia estratégica.

lo mismo de arriba pero con otro diseño

library(dplyr)
library(tidyr)
library(scales)
library(kableExtra)

margen_stats <- datadidas %>%
  summarise(
    Ventas_Promedio  = mean(ventas_total, na.rm = TRUE),
    Ventas_Mediana   = median(ventas_total, na.rm = TRUE),
    Ventas_Mínimo    = min(ventas_total, na.rm = TRUE),
    Ventas_Máximo    = max(ventas_total, na.rm = TRUE),
    Ventas_DesvEst   = sd(ventas_total, na.rm = TRUE),

    Unidades_Promedio = mean(unidades_vendidas, na.rm = TRUE),
    Unidades_Mediana  = median(unidades_vendidas, na.rm = TRUE),
    Unidades_Mínimo   = min(unidades_vendidas, na.rm = TRUE),
    Unidades_Máximo   = max(unidades_vendidas, na.rm = TRUE),
    Unidades_DesvEst  = sd(unidades_vendidas, na.rm = TRUE)
  ) %>%
  pivot_longer(everything(),
               names_to = c("Variable", "Indicador"),
               names_sep = "_",
               values_to = "Valor") %>%
  pivot_wider(names_from = Variable, values_from = Valor) %>%
  mutate(
    Ventas = scales::dollar(Ventas),
    Unidades = scales::number(Unidades)
  )

kable(margen_stats,
      caption = " Ventas vs Unidades",
      booktabs = TRUE,
      align = "c") %>%
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE, background = "#2C3E50", color = "white") %>%
  column_spec(2, bold = TRUE, color = "green") %>%
  column_spec(3, bold = TRUE, color = "blue")
Ventas vs Unidades
Indicador Ventas Unidades
Promedio $12,455.08 257
Mediana $7,803.50 176
Mínimo $0.00 0
Máximo $82,500.00 1 275
DesvEst $12,716.39 214
ventas_ciudad <- datadidas %>%
  group_by(Ciudad = ciudad) %>%
  summarise(
    Ventas = sum(ventas_total, na.rm = TRUE),
    Unidades = sum(unidades_vendidas, na.rm = TRUE),
    Registros = n()
  ) %>%
  arrange(desc(Ventas)) %>%
  mutate(
    Participacion = Ventas / sum(Ventas),
    Ranking = row_number()
  )
library(kableExtra)

ventas_ciudad %>%
  mutate(
    Ventas = scales::dollar(Ventas),
    Unidades = scales::comma(Unidades),
    Participacion = scales::percent(Participacion)
  ) %>%
  kable(
    caption = "📊 Dashboard de Ventas por Ciudad",
    align = "c",
    booktabs = TRUE
  ) %>%
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE, background = "#1F4E79", color = "white") %>%
  column_spec(2, color = "green", bold = TRUE) %>%
  column_spec(5, color = "blue") %>%
  column_spec(6, bold = TRUE)
📊 Dashboard de Ventas por Ciudad
Ciudad Ventas Unidades Registros Participacion Ranking
New York $5,676,160 111,954 216 4.72357% 1
San Francisco $4,929,220 86,900 216 4.10199% 2
Charleston $4,904,272 102,483 288 4.08123% 3
Portland $4,176,777 72,946 360 3.47582% 4
Orlando $3,946,476 60,295 216 3.28417% 5
Miami $3,874,113 73,135 144 3.22395% 6
Los Angeles $3,651,288 76,384 216 3.03852% 7
Houston $3,629,632 90,322 216 3.02050% 8
New Orleans $3,377,031 57,615 216 2.81029% 9
Seattle $3,222,093 46,611 144 2.68135% 10
Richmond $3,074,415 52,969 216 2.55846% 11
Albany $2,994,304 57,186 144 2.49179% 12
Dallas $2,982,739 69,178 216 2.48217% 13
Las Vegas $2,981,134 51,831 216 2.48083% 14
Charlotte $2,936,581 62,936 144 2.44376% 15
Albuquerque $2,824,641 52,633 216 2.35060% 16
Boise $2,742,753 63,827 216 2.28246% 17
Honolulu $2,734,457 40,375 144 2.27555% 18
Atlanta $2,708,591 56,391 216 2.25403% 19
Denver $2,569,036 41,378 144 2.13789% 20
Knoxville $2,567,190 66,077 216 2.13636% 21
Birmingham $2,513,424 63,327 216 2.09162% 22
Manchester $2,339,267 40,812 216 1.94669% 23
Detroit $2,287,283 50,095 144 1.90343% 24
Cheyenne $2,282,342 50,228 144 1.89931% 25
Columbus $2,269,283 47,781 144 1.88845% 26
Phoenix $2,254,096 46,919 216 1.87581% 27
Jackson $2,218,609 56,814 216 1.84628% 28
Burlington $2,041,598 38,685 216 1.69897% 29
Billings $1,930,761 42,713 144 1.60674% 30
Anchorage $1,810,428 30,815 144 1.50660% 31
Little Rock $1,802,672 48,468 216 1.50014% 32
Hartford $1,646,448 34,696 216 1.37014% 33
Boston $1,578,435 32,895 216 1.31354% 34
Oklahoma City $1,512,059 40,459 216 1.25830% 35
Wilmington $1,508,537 30,275 144 1.25537% 36
Philadelphia $1,478,794 27,662 216 1.23062% 37
Salt Lake City $1,387,620 48,548 216 1.15475% 38
Louisville $1,241,148 28,664 144 1.03286% 39
Wichita $1,225,314 29,463 144 1.01968% 40
Newark $1,220,446 26,540 144 1.01563% 41
Chicago $1,204,063 25,407 144 1.00199% 42
Providence $1,202,256 27,473 216 1.00049% 43
St. Louis $1,189,515 36,404 144 0.98989% 44
Indianapolis $1,084,723 26,332 144 0.90268% 45
Sioux Falls $1,041,101 22,973 144 0.86638% 46
Baltimore $951,134 20,818 144 0.79151% 47
Fargo $950,930 22,781 144 0.79134% 48
Milwaukee $948,894 23,950 144 0.78965% 49
Des Moines $909,811 23,446 144 0.75712% 50
Minneapolis $903,918 20,838 144 0.75222% 51
Omaha $728,838 19,154 144 0.60652% 52
library(dplyr)
library(ggplot2)
library(sf)
coordenadas <- data.frame(
  Ciudad = c(
    "New York","San Francisco","Charleston","Portland","Orlando","Miami",
    "Los Angeles","Houston","New Orleans","Seattle","Richmond","Albany",
    "Dallas","Las Vegas","Charlotte","Albuquerque","Boise","Honolulu",
    "Atlanta","Denver","Knoxville","Birmingham","Manchester","Detroit",
    "Cheyenne","Columbus","Phoenix","Jackson","Burlington","Billings",
    "Anchorage","Little Rock","Hartford","Boston","Oklahoma City",
    "Wilmington","Philadelphia","Salt Lake City","Louisville","Wichita",
    "Newark","Chicago","Providence","St. Louis","Indianapolis",
    "Sioux Falls","Baltimore",
    "Fargo","Milwaukee","Des Moines","Minneapolis","Omaha"
  ),
  lat = c(
    40.7128,37.7749,32.7765,45.5152,28.5383,25.7617,
    34.0522,29.7604,29.9511,47.6062,37.5407,42.6526,
    32.7767,36.1699,35.2271,35.0844,43.6150,21.3069,
    33.7490,39.7392,35.9606,33.5186,42.9956,42.3314,
    41.1400,39.9612,33.4484,32.2988,44.4759,45.7833,
    61.2181,34.7465,41.7658,42.3601,35.4676,
    34.2257,39.9526,40.7608,38.2527,37.6872,
    40.7357,41.8781,41.8240,38.6270,39.7684,
    43.5446,39.2904,
    46.8772,43.0389,41.5868,44.9778,41.2565
  ),
  lon = c(
    -74.0060,-122.4194,-79.9311,-122.6784,-81.3792,-80.1918,
    -118.2437,-95.3698,-90.0715,-122.3321,-77.4360,-73.7562,
    -96.7970,-115.1398,-80.8431,-106.6504,-116.2023,-157.8583,
    -84.3880,-104.9903,-83.9207,-86.8104,-71.4548,-83.0458,
    -104.8202,-82.9988,-112.0740,-90.1848,-73.2121,-108.5007,
    -149.9003,-92.2896,-72.6734,-71.0589,-97.5164,
    -77.9447,-75.1652,-111.8910,-85.7585,-97.3301,
    -74.1724,-87.6298,-71.4128,-90.1994,-86.1581,
    -96.7311,-76.6122,
    -96.7898,-87.9065,-93.6250,-93.2650,-95.9345
  )
)

“Se optó por una visualización simplificada eliminando elementos innecesarios como ejes y reduciendo el ruido visual, con el fin de resaltar las ciudades de mayor contribución. La representación en millones facilita la interpretación comparativa y permite identificar claramente la concentración del mercado.”

ANALISIS POR REGIONES

library(dplyr)
library(ggplot2)
library(scales)

ggplot(datadidas %>%
         group_by(region) %>%
         summarise(ventas = sum(ventas_total, na.rm = TRUE)) %>%
         arrange(desc(ventas)) %>%
         mutate(
           porcentaje = ventas / sum(ventas),
           region = reorder(region, ventas)
         ),
       aes(x = region, y = ventas, fill = region)) +
  
  geom_col(width = 0.6) +
  
  geom_text(aes(label = paste0(
    dollar(ventas, scale = 1e-6, suffix = "M"),
    " | ", percent(porcentaje, accuracy = 0.1)
  )),
  hjust = -0.1,
  size = 4,
  fontface = "bold") +
  
  coord_flip() +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.25))
  ) +
  
  # 🎨 COLORES CON LÓGICA GEOGRÁFICA
  scale_fill_manual(values = c(
    "West" = "#2C7FB8",       # azul
    "South" = "#D7191C",      # rojo
    "Midwest" = "#1A9641",    # verde
    "Northeast" = "#FDAE61"   # dorado
  )) +
  
  labs(
    title = "Concentración de ventas por región",
    subtitle = "Análisis comparativo de participación (%) y volumen (millones USD)",
    x = NULL,
    y = "Ventas (Millones USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.background = element_rect(fill = "white", color = NA),
    legend.position = "none" # opcional (puedes poner "right")
  )

library(dplyr)
library(ggplot2)
library(scales)
library(stringr)

# 📊 Preparación de datos
datos_plot <- datadidas %>%
  group_by(producto) %>%
  summarise(
    ventas = sum(ventas_total, na.rm = TRUE)
  ) %>%
  arrange(desc(ventas)) %>%
  mutate(
    porcentaje = ventas / sum(ventas),
    
    # 🔥 Clasificación corregida
    categoria = case_when(
      str_detect(producto, regex("Shoes|Sneaker|Footwear", ignore_case = TRUE)) ~ "Calzado",
      str_detect(producto, regex("Shirt|Jacket|Hoodie|Shorts|Pants|Apparel", ignore_case = TRUE)) ~ "Ropa",
      TRUE ~ "Accesorios"
    ),
    
    # 🔄 Orden para gráfico
    producto = reorder(producto, ventas)
  )

# 📈 Gráfico final
ggplot(datos_plot, aes(x = producto, y = ventas, fill = categoria)) +
  
  geom_col(width = 0.6) +
  
  # 🧾 Etiquetas (valor + %)
  geom_text(
    aes(label = ifelse(
      is.na(porcentaje),
      dollar(ventas, scale = 1e-6, suffix = "M"),
      paste0(
        dollar(ventas, scale = 1e-6, suffix = "M"),
        " | ",
        percent(porcentaje, accuracy = 0.1)
      )
    )),
    hjust = -0.2,
    size = 4,
    fontface = "bold"
  ) +
  
  # 🔥 SOLUCIÓN AL CORTE
  coord_flip(clip = "off") +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.35))  # 👈 más espacio para etiquetas
  ) +
  
  # 🎨 Colores estratégicos
  scale_fill_manual(values = c(
    "Calzado" = "#2C7FB8",
    "Ropa" = "#1A9641",
    "Accesorios" = "#FDAE61"
  )) +
  
  labs(
    title = "Distribución de ventas por línea y categoría de producto",
    subtitle = "Clasificación estratégica: Calzado, Ropa y Accesorios",
    x = NULL,
    y = "Ventas (Millones USD)",
    fill = "Categoría"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    legend.position = "top",
    
    # 👇 CLAVE para evitar cortes visuales
    plot.margin = margin(10, 50, 10, 10),
    
    plot.background = element_rect(fill = "white", color = NA)
  )

library(dplyr)
library(ggplot2)
library(scales)

# 📊 Preparación de datos
datos_retail <- datadidas %>%
  group_by(distribuidor) %>%
  summarise(
    ventas = sum(ventas_total, na.rm = TRUE)
  ) %>%
  arrange(desc(ventas)) %>%
  mutate(
    participacion = ventas / sum(ventas),
    
    # 🔥 Color único por retailer
    color_marca = case_when(
      distribuidor == "Amazon" ~ "#FF9900",        # naranja Amazon
      distribuidor == "Walmart" ~ "#0071CE",       # azul Walmart
      distribuidor == "Foot Locker" ~ "#000000",   # negro Foot Locker
      distribuidor == "West Gear" ~ "#6A1B9A",     # morado (distintivo)
      distribuidor == "Sports Direct" ~ "#E41A1C", # rojo fuerte (deporte)
      distribuidor == "Kohl's" ~ "#4B2E83",        # morado oscuro retail
      TRUE ~ "#999999"  # fallback (por seguridad)
    ),
    
    distribuidor = reorder(distribuidor, ventas)
  )

# 📈 Gráfico
ggplot(datos_retail, aes(x = distribuidor, y = ventas, fill = color_marca)) +
  
  geom_col(width = 0.6) +
  
  geom_text(
    aes(label = paste0(
      dollar(ventas, scale = 1e-6, suffix = "M"),
      " | ",
      percent(participacion, accuracy = 0.1)
    )),
    hjust = -0.2,
    size = 4,
    fontface = "bold"
  ) +
  
  coord_flip(clip = "off") +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.35))
  ) +
  
  # 🎨 Colores exactos definidos manualmente
  scale_fill_identity() +
  
  labs(
    title = "Distribución de ventas por retailer",
    subtitle = "Asignación de colores por identidad de marca",
    x = NULL,
    y = "Ventas (Millones USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    plot.margin = margin(10, 50, 10, 10),
    plot.background = element_rect(fill = "white", color = NA)
  )

library(dplyr)
library(ggplot2)
library(scales)
library(stringr)

# 📊 Preparación de datos
datos_metodo <- datadidas %>%
  group_by(metodo_venta) %>%
  summarise(
    ventas = sum(ventas_total, na.rm = TRUE)
  ) %>%
  arrange(desc(ventas)) %>%
  mutate(
    participacion = ventas / sum(ventas),
    
    # 🔥 Colores estratégicos por método de venta
    color_metodo = case_when(
      str_detect(metodo_venta, regex("Online|Web|E-commerce", ignore_case = TRUE)) ~ "#2C7FB8",  # azul digital
      str_detect(metodo_venta, regex("Store|In-store|Retail", ignore_case = TRUE)) ~ "#1A9641",  # verde físico
      str_detect(metodo_venta, regex("Outlet", ignore_case = TRUE)) ~ "#FDAE61",                # naranja outlet
      TRUE ~ "#7B3294"  # otros métodos (morado distintivo)
    ),
    
    metodo_venta = reorder(metodo_venta, ventas)
  )

# 📈 Gráfico
ggplot(datos_metodo, aes(x = metodo_venta, y = ventas, fill = color_metodo)) +
  
  geom_col(width = 0.6) +
  
  # 🧾 Etiquetas consistentes
  geom_text(
    aes(label = paste0(
      dollar(ventas, scale = 1e-6, suffix = "M"),
      " | ",
      percent(participacion, accuracy = 0.1)
    )),
    hjust = -0.2,
    size = 4,
    fontface = "bold"
  ) +
  
  # 🔥 Evita cortes
  coord_flip(clip = "off") +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.35))
  ) +
  
  # 🎨 Colores definidos manualmente
  scale_fill_identity() +
  
  labs(
    title = "Distribución de ventas por método de venta",
    subtitle = "Comparación entre canales de comercialización",
    x = NULL,
    y = "Ventas (Millones USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    plot.margin = margin(10, 50, 10, 10),
    plot.background = element_rect(fill = "white", color = NA)
  )

Análisis de Rentabilidad

El margen operativo mide qué proporción de las ventas se convierte en utilidad, independientemente del volumen vendido. Es el indicador más relevante para evaluar la eficiencia y sostenibilidad del negocio.

library(dplyr)
library(tidyr)
library(scales)
library(kableExtra)

# 📊 Tabla de margen tipo dashboard
tabla_margen <- datadidas %>%
  summarise(
    Margen_Promedio = mean(margen_operativo, na.rm = TRUE),
    Margen_Mediana  = median(margen_operativo, na.rm = TRUE),
    Margen_Minimo   = min(margen_operativo, na.rm = TRUE),
    Margen_Maximo   = max(margen_operativo, na.rm = TRUE),
    Margen_DesvEst  = sd(margen_operativo, na.rm = TRUE)
  ) %>%
  pivot_longer(
    everything(),
    names_to = c("Variable", "Indicador"),
    names_sep = "_",
    values_to = "Valor"
  ) %>%
  select(-Variable) %>%
  mutate(
    # 🔥 Orden tipo dashboard
    Indicador = factor(Indicador, 
                       levels = c("Promedio", "Mediana", "Minimo", "Maximo", "DesvEst")),
    
    # 🎨 Formato porcentaje
    Valor = percent(Valor, accuracy = 0.1)
  ) %>%
  arrange(Indicador)

# 📋 Tabla final estilo dashboard
kable(
  tabla_margen,
  col.names = c("Indicador", "Margen Operativo"),
  caption = "Resumen estadístico del margen operativo",
  align = "c",
  booktabs = TRUE
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  
  # 🎨 Header
  row_spec(0, bold = TRUE, background = "#2C3E50", color = "white") %>%
  
  # 🎯 Columna margen destacada
  column_spec(2, bold = TRUE, color = "#2C7FB8")
Resumen estadístico del margen operativo
Indicador Margen Operativo
Promedio 42.3%
Mediana 41.0%
Minimo 10.0%
Maximo 80.0%
DesvEst 9.7%
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(scales)

# 📊 Datos en formato largo, usando un nombre de columna único
datos_long <- datadidas %>%
  select(margen_operativo, utilidad_operativa) %>%
  pivot_longer(
    cols = everything(),
    names_to = "tipo_variable",   # nombre único
    values_to = "valor"
  )

# 🔢 Número óptimo de bins (Sturges)
n <- nrow(datadidas)
bins_opt <- ceiling(log2(n) + 1)

# 🟡 Calcular media y mediana por variable
resumen <- datos_long %>%
  group_by(tipo_variable) %>%
  summarise(
    media = mean(valor, na.rm = TRUE),
    mediana = median(valor, na.rm = TRUE)
  )

# 📈 Gráfico interactivo
ggplotly(
  ggplot(datos_long, aes(x = valor, fill = tipo_variable)) +
    
    geom_histogram(
      bins = bins_opt,
      color = "black",
      alpha = 0.7
    ) +
    
    # 🔴 Media
    geom_vline(
      data = resumen,
      aes(xintercept = media, color = tipo_variable),
      linetype = "dashed",
      linewidth = 1
    ) +
    
    # 🟢 Mediana
    geom_vline(
      data = resumen,
      aes(xintercept = mediana, color = tipo_variable),
      linetype = "dotted",
      linewidth = 1
    ) +
    
    facet_wrap(~tipo_variable, scales = "free_x") +
    
    scale_fill_manual(values = c(
      "margen_operativo" = "#2C7FB8",
      "utilidad_operativa" = "#1A9641"
    )) +
    
    scale_color_manual(values = c(
      "margen_operativo" = "red",
      "utilidad_operativa" = "darkgreen"
    )) +
    
    labs(
      title = "Distribución comparativa: Margen vs Utilidad",
      subtitle = "Histogramas con media (línea punteada) y mediana (línea discontinua)",
      x = "Valor",
      y = "Frecuencia"
    ) +
    
    theme_minimal(base_size = 12) +
    theme(legend.position = "none")
)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(scales)

datos_long <- datadidas %>%
  select(margen_operativo, utilidad_operativa) %>%
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "valor"
  )

ggplotly(
  ggplot(datos_long, aes(x = variable, y = valor, fill = variable)) +
    
    geom_boxplot(alpha = 0.7, width = 0.5, outlier.color = "red") +
    
    facet_wrap(~variable, scales = "free_y") +
    
    # 🔥 formato correcto SIN ifelse
    scale_y_continuous(labels = comma) +
    
    scale_fill_manual(values = c(
      "margen_operativo" = "#2C7FB8",
      "utilidad_operativa" = "#1A9641"
    )) +
    
    labs(
      title = "Distribución comparativa: Margen vs Utilidad",
      subtitle = "Escalas independientes por variable",
      x = NULL,
      y = "Valor"
    ) +
    
    theme_minimal() +
    theme(legend.position = "none")
)

rentabilidad por producto

library(dplyr)
library(ggplot2)
library(plotly)
library(scales)

# 🎨 Paleta consistente tipo Adidas
colores_producto <- c(
  "Women's Street Footwear" = "#1f77b4",
  "Women's Athletic Footwear" = "#ff7f0e",
  "Women's Apparel" = "#2ca02c",
  "Men's Street Footwear" = "#d62728",
  "Men's Athletic Footwear" = "#9467bd",
  "Men's Apparel" = "#8c564b"
)

# 🟡 Calcular media por producto
media_producto <- datadidas %>%
  group_by(producto) %>%
  summarise(media = mean(margen_operativo, na.rm = TRUE))

# 📊 Gráfico mejorado
p <- ggplot(datadidas, aes(x = producto, y = margen_operativo, fill = producto)) +
  
  # Boxplot
  geom_boxplot(alpha = 0.7, outlier.color = "black") +
  
  # 🔴 Media como punto
  geom_point(
    data = media_producto,
    aes(x = producto, y = media),
    color = "red",
    size = 3
  ) +
  
  # 🔴 Línea horizontal pequeña (media)
  stat_summary(
    fun = mean,
    geom = "crossbar",
    width = 0.5,
    color = "red",
    fatten = 0
  ) +
  
  coord_flip() +
  
  scale_y_continuous(labels = percent_format()) +
  
  scale_fill_manual(values = colores_producto) +
  
  labs(
    title = "Distribución del margen por producto",
    subtitle = "Boxplot con media (punto rojo) + interactividad",
    x = "Producto",
    y = "Margen operativo"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(legend.position = "none")

# 🔥 Convertir a interactivo
ggplotly(p, tooltip = c("x", "y"))
library(dplyr)
library(ggplot2)
library(scales)

# 📊 Preparar datos
bubble_data <- datadidas %>%
  group_by(producto, metodo_venta) %>%
  summarise(
    margen = mean(margen_operativo, na.rm = TRUE),
    utilidad = sum(utilidad_operativa, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    utilidad_miles = utilidad / 1e3,
    etiqueta = paste0(round(utilidad_miles, 0), "K"),
    producto = reorder(producto, utilidad)
  )

# 🎨 Gráfico
ggplot(bubble_data, aes(
  x = margen,
  y = producto,
  size = utilidad_miles,
  color = metodo_venta
)) +
  
  # 🔵 Burbujas
  geom_point(alpha = 0.85) +
  
  # 🔥 Etiquetas en negro (más legibles)
  geom_text(
    aes(label = etiqueta),
    color = "black",
    size = 3,
    fontface = "bold",
    vjust = -0.8   # un poco arriba del punto
  ) +
  
  # 📏 Tamaño
  scale_size_continuous(
    range = c(5, 18),
    labels = function(x) paste0(round(x,0), "K"),
    name = "Utilidad (Miles)"
  ) +
  
  # 🎨 Colores
  scale_color_manual(values = c(
    "In-store" = "#E74C3C",
    "Online" = "#27AE60",
    "Outlet" = "#2980B9"
  )) +
  
  # 📊 Eje %
  scale_x_continuous(labels = percent_format()) +
  
  labs(
    title = "Rentabilidad por Producto y Método de Venta",
    subtitle = "Tamaño = Utilidad en miles",
    x = "Margen operativo",
    y = "Producto",
    color = "Método de venta"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "right"
  )

library(dplyr)
library(ggplot2)
library(scales)

# 📊 Datos
bar_data <- datadidas %>%
  group_by(producto, region) %>%
  summarise(
    margen = mean(margen_operativo, na.rm = TRUE),
    .groups = "drop"
  )

# 📊 Gráfico
ggplot(bar_data, aes(x = producto, y = margen, fill = region)) +
  
  geom_col(position = "dodge") +
  
  coord_flip() +
  
  scale_y_continuous(labels = percent_format()) +
  
  scale_fill_brewer(palette = "Set2") +
  
  labs(
    title = "Margen operativo por Producto y Región",
    subtitle = "Comparación de rentabilidad por segmento",
    x = "Producto",
    y = "Margen operativo",
    fill = "Región"
  ) +
  
  theme_minimal(base_size = 12)

Comparativo de margen operativo por método de venta y región

datadidas %>%
  group_by(metodo_venta, region) %>%
  summarise(margen_prom = mean(margen_operativo, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = region, y = margen_prom, fill = metodo_venta)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            position = position_dodge(width = 0.7),
            vjust = -0.4, size = 2.8) +
  scale_y_continuous(
    labels = scales::percent_format(),
    expand = expansion(mult = c(0, 0.15))
  ) +
  scale_fill_brewer(palette = "Set1") +
  labs(
    title = "Figura 13. Margen Operativo por Método de Venta y Región",
    x     = "Región",
    y     = "Margen Operativo Promedio",
    fill  = "Método de Venta"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position = "bottom",
    axis.text.x     = element_text(angle = 15, hjust = 1)
  )

Comparativo de margen operativo por método de venta y producto

datadidas %>%
  group_by(metodo_venta, producto) %>%
  summarise(margen_prom = mean(margen_operativo, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = producto, y = margen_prom, fill = metodo_venta)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            position = position_dodge(width = 0.7),
            vjust = -0.4, size = 2.8) +
  scale_y_continuous(
    labels = scales::percent_format(),
    expand = expansion(mult = c(0, 0.15))
  ) +
  scale_fill_brewer(palette = "Set1") +
  labs(
    title = "Figura 13. Margen Operativo por Método de Venta y Producto",
    x     = "Producto",
    y     = "Margen Operativo Promedio",
    fill  = "Método de Venta"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position = "bottom",
    axis.text.x     = element_text(angle = 15, hjust = 1)
  )

datadidas %>%
  group_by(region) %>%
  summarise(margen_prom = mean(margen_operativo)) %>%
  mutate(region = reorder(region, margen_prom)) %>%
  ggplot(aes(x = region, y = margen_prom, fill = margen_prom)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            hjust = -0.1, size = 3.5) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(),
                     expand = expansion(mult = c(0, 0.2))) +
  scale_fill_gradient(low = "#a8d5e2", high = "#1a6b8a", guide = "none") +
  labs(title = "Figura 4. Margen Operativo Promedio por Región",
       x     = "Región",
       y     = "Margen Operativo Promedio") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major.y = element_blank())

ventas_metodo <- datadidas %>%
  group_by(`Método de Venta` = metodo_venta) %>%
  summarise(
    `Ventas Totales (USD)` = sum(ventas_total, na.rm = TRUE),
    `Utilidad (USD)`       = sum(utilidad_operativa, na.rm = TRUE),
    `Margen Promedio`      = mean(margen_operativo, na.rm = TRUE),
    `Unidades Vendidas`    = sum(unidades_vendidas, na.rm = TRUE),
  ) %>%
  arrange(desc(`Ventas Totales (USD)`)) %>%
  mutate(
    `Participación`        = `Ventas Totales (USD)` / sum(`Ventas Totales (USD)`),
    `Ventas Totales (USD)` = scales::dollar(`Ventas Totales (USD)`),
    `Utilidad (USD)`       = scales::dollar(`Utilidad (USD)`),
    `Margen Promedio`      = scales::percent(`Margen Promedio`, accuracy = 0.1),
    `Unidades Vendidas`    = scales::comma(`Unidades Vendidas`),
    `Participación`        = scales::percent(`Participación`, accuracy = 0.1)
  )

kable(ventas_metodo,
      caption = "Resumen de Ventas por Método de Venta",
      booktabs = TRUE,
      align = c("l", "r", "r", "r", "r", "r", "r")) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  ) %>%
  column_spec(2, bold = TRUE)
Resumen de Ventas por Método de Venta
Método de Venta Ventas Totales (USD) Utilidad (USD) Margen Promedio Unidades Vendidas Participación
Online $44,965,657 $19,552,538 46.4% 939,093 37.4%
Outlet $39,536,618 $14,913,301 39.5% 849,778 32.9%
In-store $35,664,375 $12,759,129 35.6% 689,990 29.7%
  datadidas %>%
  group_by(metodo_venta) %>%
  summarise(margen_prom = mean(margen_operativo)) %>%
  mutate(metodo = reorder(metodo_venta, margen_prom)) %>%
  ggplot(aes(x = metodo, y = margen_prom, fill = margen_prom)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            hjust = -0.1, size = 3.0) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(),
                     expand = expansion(mult = c(0, 0.2))) +
  scale_fill_gradient(low = "#f9e4b7", high = "#e67e22", guide = "none") +
  labs(title = "Figura 6. Margen Operativo Promedio por Método de Venta",
       x     = "Método de Venta",
       y     = "Margen Operativo Promedio") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major.y = element_blank())

ventas_producto <- datadidas %>%
  group_by(`Producto` = producto) %>%
  summarise(
    `Ventas Totales (USD)` = sum(ventas_total, na.rm = TRUE),
    `Utilidad (USD)`       = sum(utilidad_operativa, na.rm = TRUE),
    `Margen Promedio`      = mean(margen_operativo, na.rm = TRUE),
    `Unidades Vendidas`    = sum(unidades_vendidas, na.rm = TRUE),
  ) %>%
  arrange(desc(`Ventas Totales (USD)`)) %>%
  mutate(
    `Participación`        = `Ventas Totales (USD)` / sum(`Ventas Totales (USD)`),
    `Ventas Totales (USD)` = scales::dollar(`Ventas Totales (USD)`),
    `Utilidad (USD)`       = scales::dollar(`Utilidad (USD)`),
    `Margen Promedio`      = scales::percent(`Margen Promedio`, accuracy = 0.1),
    `Unidades Vendidas`    = scales::comma(`Unidades Vendidas`),
    `Participación`        = scales::percent(`Participación`, accuracy = 0.1)
  )

kable(ventas_producto,
      caption = "Resumen de Ventas por Producto",
      booktabs = TRUE,
      align = c("l", "r", "r", "r", "r", "r", "r")) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  ) %>%
  column_spec(2, bold = TRUE)
Resumen de Ventas por Producto
Producto Ventas Totales (USD) Utilidad (USD) Margen Promedio Unidades Vendidas Participación
Men’s Street Footwear $27,680,769 $11,629,046 44.6% 593,320 23.0%
Women’s Apparel $23,870,985 $9,685,221 44.1% 433,827 19.9%
Men’s Athletic Footwear $20,577,180 $7,437,457 40.3% 435,526 17.1%
Women’s Street Footwear $17,201,563 $6,494,017 41.0% 392,269 14.3%
Men’s Apparel $16,520,632 $6,381,405 41.3% 306,683 13.7%
Women’s Athletic Footwear $14,315,521 $5,597,822 42.4% 317,236 11.9%
  datadidas %>%
  group_by(producto) %>%
  summarise(margen_prom = mean(margen_operativo)) %>%
  mutate(Producto = reorder(producto, margen_prom)) %>%
  ggplot(aes(x = Producto, y = margen_prom, fill = margen_prom)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            hjust = -0.1, size = 3.0) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(),
                     expand = expansion(mult = c(0, 0.2))) +
  scale_fill_gradient(low = "#d9d9d9", high = "#252525", guide = "none") +
  labs(title = "Figura 6. Margen Operativo Promedio por Producto",
       x     = "Producto",
       y     = "Margen Operativo Promedio") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major.y = element_blank())

Relación entre Ventas y Utilidad Operativa

Esta sección examina si un mayor volumen de ventas se traduce proporcionalmente en mayor utilidad, o si existen ineficiencias que limitan la rentabilidad.

library(ggplot2)
library(scales)

# 📊 Correlación
correlacion <- cor(datadidas$ventas_total, 
                   datadidas$utilidad_operativa, 
                   use = "complete.obs")

ggplot(datadidas, aes(x = ventas_total, y = utilidad_operativa)) +
  
  # 🔵 Puntos más suaves
  geom_point(alpha = 0.2, size = 1.2, color = "#2C3E50") +
  
  # 📈 Línea de tendencia
  geom_smooth(
    method = "lm",
    se = TRUE,
    color = "#27AE60",
    linetype = "dashed",
    linewidth = 1
  ) +
  
  # 🔥 Texto de correlación
  annotate(
    "text",
    x = Inf, y = -Inf,
    label = paste0("Correlación: ", round(correlacion, 3)),
    hjust = 1.1, vjust = -1.2,
    size = 4,
    fontface = "bold"
  ) +
  
  # 💲 Formato miles
  scale_x_continuous(labels = dollar_format(scale = 1e-3, suffix = "K")) +
  scale_y_continuous(labels = dollar_format(scale = 1e-3, suffix = "K")) +
  
  labs(
    title = "Figura 5. Ventas Totales vs. Utilidad Operativa",
    subtitle = "Relación lineal entre ventas y rentabilidad",
    x = "Ventas Totales (Miles USD)",
    y = "Utilidad Operativa (Miles USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank()
  )

cor_val <- cor(datadidas$ventas_total, datadidas$utilidad_operativa)

data.frame(
  Métrica = "Correlación de Pearson (Ventas vs. Utilidad)",
  Valor   = round(cor_val, 3)
) %>%
  kable(caption = "Correlación entre Ventas Totales y Utilidad Operativa",
        booktabs = TRUE,
        align = c("l", "r")) %>%
  kable_styling(latex_options = c("striped", "hold_position"),
                font_size = 11)
Correlación entre Ventas Totales y Utilidad Operativa
Métrica Valor
Correlación de Pearson (Ventas vs. Utilidad) 0.935

Relación entre variables clave

library(ggplot2)
library(plotly)
library(scales)

# 📊 Correlación
cor_precio <- cor(datadidas$precio_unidad,
                  datadidas$unidades_vendidas,
                  use = "complete.obs")

# 📈 Gráfico interactivo
ggplotly(
  ggplot(datadidas, aes(x = precio_unidad, y = unidades_vendidas)) +
    
    # 🔵 Puntos
    geom_point(color = "#2C7FB8", alpha = 0.5, size = 2) +
    
    # 📈 Tendencia
    geom_smooth(
      method = "lm",
      color = "#D7191C",
      linetype = "dashed",
      linewidth = 1
    ) +
    
    # 📊 Formato de ejes consistente
    scale_x_continuous(labels = dollar_format()) +
    scale_y_continuous(labels = comma_format()) +
    
    labs(
      title = paste0(
        "Figura X. Relación entre Precio por Unidad y Unidades Vendidas\n",
        "Coeficiente de correlación: ", round(cor_precio, 3)
      ),
      subtitle = "Análisis del comportamiento de la demanda",
      x = "Precio por unidad (USD)",
      y = "Unidades vendidas",
      caption = "Fuente: Elaboración propia con base en datos de Adidas"
    ) +
    
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
      plot.subtitle = element_text(size = 11, hjust = 0.5),
      axis.title = element_text(size = 11),
      axis.text = element_text(size = 10),
      panel.grid.minor = element_blank()
    ),
  
  tooltip = c("x", "y")
)

¿El precio influye en la rentabilidad?

library(ggplot2)
library(plotly)
library(scales)

# 📊 Correlación
cor_precio_margen <- cor(datadidas$precio_unidad,
                         datadidas$margen_operativo,
                         use = "complete.obs")

# 📈 Gráfico
ggplotly(
  ggplot(datadidas, aes(x = precio_unidad, y = margen_operativo)) +
    
    geom_point(color = "#2C7FB8", alpha = 0.5, size = 2) +
    
    geom_smooth(
      method = "lm",
      color = "#D7191C",
      linetype = "dashed",
      linewidth = 1
    ) +
    
    scale_x_continuous(labels = dollar_format()) +
    scale_y_continuous(labels = percent_format()) +
    
    labs(
      title = paste0(
        "Figura X. Relación entre Precio por Unidad y Margen Operativo\n",
        "Coeficiente de correlación: ", round(cor_precio_margen, 3)
      ),
      subtitle = "Análisis del impacto del precio en la rentabilidad",
      x = "Precio por unidad (USD)",
      y = "Margen operativo (%)",
      caption = "Fuente: Elaboración propia con base en datos de Adidas"
    ) +
    
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
      plot.subtitle = element_text(size = 11, hjust = 0.5),
      panel.grid.minor = element_blank()
    ),
  
  tooltip = c("x", "y")
)

• ¿Qué variables parecen estar más asociadas con la utilidad operativa?

library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)

# 📊 Matriz de correlación
datos_corr <- datadidas %>%
  select(precio_unidad, unidades_vendidas, ventas_total, utilidad_operativa, margen_operativo)

cor_mat <- round(cor(datos_corr, use = "complete.obs"), 3)

# 🔄 Convertir a formato largo
cor_long <- cor_mat %>%
  as.data.frame() %>%
  mutate(var1 = rownames(.)) %>%
  pivot_longer(-var1, names_to = "var2", values_to = "cor") %>%
  
  # 🔥 Filtrar solo triángulo inferior
  filter(as.numeric(factor(var1)) > as.numeric(factor(var2)))

# 🎨 Heatmap estilo dashboard
ggplot(cor_long, aes(x = var2, y = var1, fill = cor)) +
  
  geom_tile(color = "white") +
  
  # 🔢 Valores dentro
  geom_text(aes(label = round(cor, 2)), size = 4, fontface = "bold") +
  
  # 🎨 Escala divergente (ROJO - BLANCO - VERDE)
  scale_fill_gradient2(
    low = "#D73027",   # negativo
    mid = "white",
    high = "#1A9850",  # positivo
    midpoint = 0,
    limits = c(-1, 1),
    name = "Correlación"
  ) +
  
  labs(
    title = "Matriz de correlación entre variables clave",
    subtitle = "Relaciones entre precio, volumen y rentabilidad",
    x = NULL,
    y = NULL
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold"),
    legend.position = "right"
  )