Introducción

PASO 1. Cargar librerias

# Cargar librerías necesarias
library(readxl)
library(dplyr)
library(ggplot2)
library(plotly)
library(knitr)
library(kableExtra)
library(tidyr)
library(tidyverse)

PASO 2: Cargar la base de datos

# Cargar datos desde el archivo Excel
datadidas <- read_excel("C:/Users/daniel.medina/Documents/maestria/Analisis Financiero/taller 1/DatosCaso1.xlsx")

para observar las variables y las primeras filas de la base de datos

# Ver las primeras filas de los datos
class(datadidas)

## [1] "tbl_df"     "tbl"        "data.frame"

colnames(datadidas)  #listar variables

##  [1] "distribuidor"       "region"             "estado"            
##  [4] "ciudad"             "producto"           "precio_unidad"     
##  [7] "unidades_vendidas"  "ventas_total"       "utilidad_operativa"
## [10] "margen_operativo"   "metodo_venta"

head(datadidas)  # primeras lineas de la base de datos

distribuidor	region	estado	ciudad	producto	precio_unidad	unidades_vendidas	ventas_total	utilidad_operativa	margen_operativo	metodo_venta
Foot Locker	Northeast	New York	New York	Men’s Street Footwear	50	1200	60000	30000.0	0.50	In-store
Foot Locker	Northeast	New York	New York	Men’s Athletic Footwear	50	1000	50000	15000.0	0.30	In-store
Foot Locker	Northeast	New York	New York	Women’s Street Footwear	40	1000	40000	14000.0	0.35	In-store
Foot Locker	Northeast	New York	New York	Women’s Athletic Footwear	45	850	38250	13387.5	0.35	In-store
Foot Locker	Northeast	New York	New York	Men’s Apparel	60	900	54000	16200.0	0.30	In-store
Foot Locker	Northeast	New York	New York	Women’s Apparel	50	1000	50000	12500.0	0.25	In-store

# Ver estructura
str(datadidas)

## tibble [9,648 × 11] (S3: tbl_df/tbl/data.frame)
##  $ distribuidor      : chr [1:9648] "Foot Locker" "Foot Locker" "Foot Locker" "Foot Locker" ...
##  $ region            : chr [1:9648] "Northeast" "Northeast" "Northeast" "Northeast" ...
##  $ estado            : chr [1:9648] "New York" "New York" "New York" "New York" ...
##  $ ciudad            : chr [1:9648] "New York" "New York" "New York" "New York" ...
##  $ producto          : chr [1:9648] "Men's Street Footwear" "Men's Athletic Footwear" "Women's Street Footwear" "Women's Athletic Footwear" ...
##  $ precio_unidad     : num [1:9648] 50 50 40 45 60 50 50 50 40 45 ...
##  $ unidades_vendidas : num [1:9648] 1200 1000 1000 850 900 1000 1250 900 950 825 ...
##  $ ventas_total      : num [1:9648] 60000 50000 40000 38250 54000 ...
##  $ utilidad_operativa: num [1:9648] 30000 15000 14000 13388 16200 ...
##  $ margen_operativo  : num [1:9648] 0.5 0.3 0.35 0.35 0.3 0.25 0.5 0.3 0.35 0.35 ...
##  $ metodo_venta      : chr [1:9648] "In-store" "In-store" "In-store" "In-store" ...

# Ver primeras filas
head(datadidas)

distribuidor	region	estado	ciudad	producto	precio_unidad	unidades_vendidas	ventas_total	utilidad_operativa	margen_operativo	metodo_venta
Foot Locker	Northeast	New York	New York	Men’s Street Footwear	50	1200	60000	30000.0	0.50	In-store
Foot Locker	Northeast	New York	New York	Men’s Athletic Footwear	50	1000	50000	15000.0	0.30	In-store
Foot Locker	Northeast	New York	New York	Women’s Street Footwear	40	1000	40000	14000.0	0.35	In-store
Foot Locker	Northeast	New York	New York	Women’s Athletic Footwear	45	850	38250	13387.5	0.35	In-store
Foot Locker	Northeast	New York	New York	Men’s Apparel	60	900	54000	16200.0	0.30	In-store
Foot Locker	Northeast	New York	New York	Women’s Apparel	50	1000	50000	12500.0	0.25	In-store

# Resumen general
summary(datadidas)

##  distribuidor          region             estado             ciudad         
##  Length:9648        Length:9648        Length:9648        Length:9648       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    producto         precio_unidad    unidades_vendidas  ventas_total  
##  Length:9648        Min.   :  7.00   Min.   :   0.0    Min.   :    0  
##  Class :character   1st Qu.: 35.00   1st Qu.: 106.0    1st Qu.: 4065  
##  Mode  :character   Median : 45.00   Median : 176.0    Median : 7804  
##                     Mean   : 45.22   Mean   : 256.9    Mean   :12455  
##                     3rd Qu.: 55.00   3rd Qu.: 350.0    3rd Qu.:15864  
##                     Max.   :110.00   Max.   :1275.0    Max.   :82500  
##  utilidad_operativa margen_operativo metodo_venta      
##  Min.   :    0      Min.   :0.100    Length:9648       
##  1st Qu.: 1753      1st Qu.:0.350    Class :character  
##  Median : 3263      Median :0.410    Mode  :character  
##  Mean   : 4895      Mean   :0.423                      
##  3rd Qu.: 6192      3rd Qu.:0.490                      
##  Max.   :39000      Max.   :0.800

Análisis del desempeño de ventas

Esta sección presenta el comportamiento de las ventas totales y por producto según la región, referencia del producto, distribuidor y método de venta, permitiendo identificar los mercados de mayor relevancia estratégica.

lo mismo de arriba pero con otro diseño

library(dplyr)
library(tidyr)
library(scales)
library(kableExtra)

margen_stats <- datadidas %>%
  summarise(
    Ventas_Promedio  = mean(ventas_total, na.rm = TRUE),
    Ventas_Mediana   = median(ventas_total, na.rm = TRUE),
    Ventas_Mínimo    = min(ventas_total, na.rm = TRUE),
    Ventas_Máximo    = max(ventas_total, na.rm = TRUE),
    Ventas_DesvEst   = sd(ventas_total, na.rm = TRUE),

    Unidades_Promedio = mean(unidades_vendidas, na.rm = TRUE),
    Unidades_Mediana  = median(unidades_vendidas, na.rm = TRUE),
    Unidades_Mínimo   = min(unidades_vendidas, na.rm = TRUE),
    Unidades_Máximo   = max(unidades_vendidas, na.rm = TRUE),
    Unidades_DesvEst  = sd(unidades_vendidas, na.rm = TRUE)
  ) %>%
  pivot_longer(everything(),
               names_to = c("Variable", "Indicador"),
               names_sep = "_",
               values_to = "Valor") %>%
  pivot_wider(names_from = Variable, values_from = Valor) %>%
  mutate(
    Ventas = scales::dollar(Ventas),
    Unidades = scales::number(Unidades)
  )

kable(margen_stats,
      caption = " Ventas vs Unidades",
      booktabs = TRUE,
      align = "c") %>%
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE, background = "#2C3E50", color = "white") %>%
  column_spec(2, bold = TRUE, color = "green") %>%
  column_spec(3, bold = TRUE, color = "blue")

Ventas vs Unidades
Indicador	Ventas	Unidades
Promedio	$12,455.08	257
Mediana	$7,803.50	176
Mínimo	$0.00	0
Máximo	$82,500.00	1 275
DesvEst	$12,716.39	214

ventas_ciudad <- datadidas %>%
  group_by(Ciudad = ciudad) %>%
  summarise(
    Ventas = sum(ventas_total, na.rm = TRUE),
    Unidades = sum(unidades_vendidas, na.rm = TRUE),
    Registros = n()
  ) %>%
  arrange(desc(Ventas)) %>%
  mutate(
    Participacion = Ventas / sum(Ventas),
    Ranking = row_number()
  )

library(kableExtra)

ventas_ciudad %>%
  mutate(
    Ventas = scales::dollar(Ventas),
    Unidades = scales::comma(Unidades),
    Participacion = scales::percent(Participacion)
  ) %>%
  kable(
    caption = "📊 Dashboard de Ventas por Ciudad",
    align = "c",
    booktabs = TRUE
  ) %>%
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE, background = "#1F4E79", color = "white") %>%
  column_spec(2, color = "green", bold = TRUE) %>%
  column_spec(5, color = "blue") %>%
  column_spec(6, bold = TRUE)

📊 Dashboard de Ventas por Ciudad
Ciudad	Ventas	Unidades	Registros	Participacion	Ranking
New York	$5,676,160	111,954	216	4.72357%	1
San Francisco	$4,929,220	86,900	216	4.10199%	2
Charleston	$4,904,272	102,483	288	4.08123%	3
Portland	$4,176,777	72,946	360	3.47582%	4
Orlando	$3,946,476	60,295	216	3.28417%	5
Miami	$3,874,113	73,135	144	3.22395%	6
Los Angeles	$3,651,288	76,384	216	3.03852%	7
Houston	$3,629,632	90,322	216	3.02050%	8
New Orleans	$3,377,031	57,615	216	2.81029%	9
Seattle	$3,222,093	46,611	144	2.68135%	10
Richmond	$3,074,415	52,969	216	2.55846%	11
Albany	$2,994,304	57,186	144	2.49179%	12
Dallas	$2,982,739	69,178	216	2.48217%	13
Las Vegas	$2,981,134	51,831	216	2.48083%	14
Charlotte	$2,936,581	62,936	144	2.44376%	15
Albuquerque	$2,824,641	52,633	216	2.35060%	16
Boise	$2,742,753	63,827	216	2.28246%	17
Honolulu	$2,734,457	40,375	144	2.27555%	18
Atlanta	$2,708,591	56,391	216	2.25403%	19
Denver	$2,569,036	41,378	144	2.13789%	20
Knoxville	$2,567,190	66,077	216	2.13636%	21
Birmingham	$2,513,424	63,327	216	2.09162%	22
Manchester	$2,339,267	40,812	216	1.94669%	23
Detroit	$2,287,283	50,095	144	1.90343%	24
Cheyenne	$2,282,342	50,228	144	1.89931%	25
Columbus	$2,269,283	47,781	144	1.88845%	26
Phoenix	$2,254,096	46,919	216	1.87581%	27
Jackson	$2,218,609	56,814	216	1.84628%	28
Burlington	$2,041,598	38,685	216	1.69897%	29
Billings	$1,930,761	42,713	144	1.60674%	30
Anchorage	$1,810,428	30,815	144	1.50660%	31
Little Rock	$1,802,672	48,468	216	1.50014%	32
Hartford	$1,646,448	34,696	216	1.37014%	33
Boston	$1,578,435	32,895	216	1.31354%	34
Oklahoma City	$1,512,059	40,459	216	1.25830%	35
Wilmington	$1,508,537	30,275	144	1.25537%	36
Philadelphia	$1,478,794	27,662	216	1.23062%	37
Salt Lake City	$1,387,620	48,548	216	1.15475%	38
Louisville	$1,241,148	28,664	144	1.03286%	39
Wichita	$1,225,314	29,463	144	1.01968%	40
Newark	$1,220,446	26,540	144	1.01563%	41
Chicago	$1,204,063	25,407	144	1.00199%	42
Providence	$1,202,256	27,473	216	1.00049%	43
St. Louis	$1,189,515	36,404	144	0.98989%	44
Indianapolis	$1,084,723	26,332	144	0.90268%	45
Sioux Falls	$1,041,101	22,973	144	0.86638%	46
Baltimore	$951,134	20,818	144	0.79151%	47
Fargo	$950,930	22,781	144	0.79134%	48
Milwaukee	$948,894	23,950	144	0.78965%	49
Des Moines	$909,811	23,446	144	0.75712%	50
Minneapolis	$903,918	20,838	144	0.75222%	51
Omaha	$728,838	19,154	144	0.60652%	52

library(dplyr)
library(ggplot2)
library(sf)

coordenadas <- data.frame(
  Ciudad = c(
    "New York","San Francisco","Charleston","Portland","Orlando","Miami",
    "Los Angeles","Houston","New Orleans","Seattle","Richmond","Albany",
    "Dallas","Las Vegas","Charlotte","Albuquerque","Boise","Honolulu",
    "Atlanta","Denver","Knoxville","Birmingham","Manchester","Detroit",
    "Cheyenne","Columbus","Phoenix","Jackson","Burlington","Billings",
    "Anchorage","Little Rock","Hartford","Boston","Oklahoma City",
    "Wilmington","Philadelphia","Salt Lake City","Louisville","Wichita",
    "Newark","Chicago","Providence","St. Louis","Indianapolis",
    "Sioux Falls","Baltimore",
    "Fargo","Milwaukee","Des Moines","Minneapolis","Omaha"
  ),
  lat = c(
    40.7128,37.7749,32.7765,45.5152,28.5383,25.7617,
    34.0522,29.7604,29.9511,47.6062,37.5407,42.6526,
    32.7767,36.1699,35.2271,35.0844,43.6150,21.3069,
    33.7490,39.7392,35.9606,33.5186,42.9956,42.3314,
    41.1400,39.9612,33.4484,32.2988,44.4759,45.7833,
    61.2181,34.7465,41.7658,42.3601,35.4676,
    34.2257,39.9526,40.7608,38.2527,37.6872,
    40.7357,41.8781,41.8240,38.6270,39.7684,
    43.5446,39.2904,
    46.8772,43.0389,41.5868,44.9778,41.2565
  ),
  lon = c(
    -74.0060,-122.4194,-79.9311,-122.6784,-81.3792,-80.1918,
    -118.2437,-95.3698,-90.0715,-122.3321,-77.4360,-73.7562,
    -96.7970,-115.1398,-80.8431,-106.6504,-116.2023,-157.8583,
    -84.3880,-104.9903,-83.9207,-86.8104,-71.4548,-83.0458,
    -104.8202,-82.9988,-112.0740,-90.1848,-73.2121,-108.5007,
    -149.9003,-92.2896,-72.6734,-71.0589,-97.5164,
    -77.9447,-75.1652,-111.8910,-85.7585,-97.3301,
    -74.1724,-87.6298,-71.4128,-90.1994,-86.1581,
    -96.7311,-76.6122,
    -96.7898,-87.9065,-93.6250,-93.2650,-95.9345
  )
)

“Se optó por una visualización simplificada eliminando elementos innecesarios como ejes y reduciendo el ruido visual, con el fin de resaltar las ciudades de mayor contribución. La representación en millones facilita la interpretación comparativa y permite identificar claramente la concentración del mercado.”

ANALISIS POR REGIONES

library(dplyr)
library(ggplot2)
library(scales)

ggplot(datadidas %>%
         group_by(region) %>%
         summarise(ventas = sum(ventas_total, na.rm = TRUE)) %>%
         arrange(desc(ventas)) %>%
         mutate(
           porcentaje = ventas / sum(ventas),
           region = reorder(region, ventas)
         ),
       aes(x = region, y = ventas, fill = region)) +
  
  geom_col(width = 0.6) +
  
  geom_text(aes(label = paste0(
    dollar(ventas, scale = 1e-6, suffix = "M"),
    " | ", percent(porcentaje, accuracy = 0.1)
  )),
  hjust = -0.1,
  size = 4,
  fontface = "bold") +
  
  coord_flip() +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.25))
  ) +
  
  # 🎨 COLORES CON LÓGICA GEOGRÁFICA
  scale_fill_manual(values = c(
    "West" = "#2C7FB8",       # azul
    "South" = "#D7191C",      # rojo
    "Midwest" = "#1A9641",    # verde
    "Northeast" = "#FDAE61"   # dorado
  )) +
  
  labs(
    title = "Concentración de ventas por región",
    subtitle = "Análisis comparativo de participación (%) y volumen (millones USD)",
    x = NULL,
    y = "Ventas (Millones USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.background = element_rect(fill = "white", color = NA),
    legend.position = "none" # opcional (puedes poner "right")
  )

library(dplyr)
library(ggplot2)
library(scales)
library(stringr)

# 📊 Preparación de datos
datos_plot <- datadidas %>%
  group_by(producto) %>%
  summarise(
    ventas = sum(ventas_total, na.rm = TRUE)
  ) %>%
  arrange(desc(ventas)) %>%
  mutate(
    porcentaje = ventas / sum(ventas),
    
    # 🔥 Clasificación corregida
    categoria = case_when(
      str_detect(producto, regex("Shoes|Sneaker|Footwear", ignore_case = TRUE)) ~ "Calzado",
      str_detect(producto, regex("Shirt|Jacket|Hoodie|Shorts|Pants|Apparel", ignore_case = TRUE)) ~ "Ropa",
      TRUE ~ "Accesorios"
    ),
    
    # 🔄 Orden para gráfico
    producto = reorder(producto, ventas)
  )

# 📈 Gráfico final
ggplot(datos_plot, aes(x = producto, y = ventas, fill = categoria)) +
  
  geom_col(width = 0.6) +
  
  # 🧾 Etiquetas (valor + %)
  geom_text(
    aes(label = ifelse(
      is.na(porcentaje),
      dollar(ventas, scale = 1e-6, suffix = "M"),
      paste0(
        dollar(ventas, scale = 1e-6, suffix = "M"),
        " | ",
        percent(porcentaje, accuracy = 0.1)
      )
    )),
    hjust = -0.2,
    size = 4,
    fontface = "bold"
  ) +
  
  # 🔥 SOLUCIÓN AL CORTE
  coord_flip(clip = "off") +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.35))  # 👈 más espacio para etiquetas
  ) +
  
  # 🎨 Colores estratégicos
  scale_fill_manual(values = c(
    "Calzado" = "#2C7FB8",
    "Ropa" = "#1A9641",
    "Accesorios" = "#FDAE61"
  )) +
  
  labs(
    title = "Distribución de ventas por línea y categoría de producto",
    subtitle = "Clasificación estratégica: Calzado, Ropa y Accesorios",
    x = NULL,
    y = "Ventas (Millones USD)",
    fill = "Categoría"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    legend.position = "top",
    
    # 👇 CLAVE para evitar cortes visuales
    plot.margin = margin(10, 50, 10, 10),
    
    plot.background = element_rect(fill = "white", color = NA)
  )

library(dplyr)
library(ggplot2)
library(scales)

# 📊 Preparación de datos
datos_retail <- datadidas %>%
  group_by(distribuidor) %>%
  summarise(
    ventas = sum(ventas_total, na.rm = TRUE)
  ) %>%
  arrange(desc(ventas)) %>%
  mutate(
    participacion = ventas / sum(ventas),
    
    # 🔥 Color único por retailer
    color_marca = case_when(
      distribuidor == "Amazon" ~ "#FF9900",        # naranja Amazon
      distribuidor == "Walmart" ~ "#0071CE",       # azul Walmart
      distribuidor == "Foot Locker" ~ "#000000",   # negro Foot Locker
      distribuidor == "West Gear" ~ "#6A1B9A",     # morado (distintivo)
      distribuidor == "Sports Direct" ~ "#E41A1C", # rojo fuerte (deporte)
      distribuidor == "Kohl's" ~ "#4B2E83",        # morado oscuro retail
      TRUE ~ "#999999"  # fallback (por seguridad)
    ),
    
    distribuidor = reorder(distribuidor, ventas)
  )

# 📈 Gráfico
ggplot(datos_retail, aes(x = distribuidor, y = ventas, fill = color_marca)) +
  
  geom_col(width = 0.6) +
  
  geom_text(
    aes(label = paste0(
      dollar(ventas, scale = 1e-6, suffix = "M"),
      " | ",
      percent(participacion, accuracy = 0.1)
    )),
    hjust = -0.2,
    size = 4,
    fontface = "bold"
  ) +
  
  coord_flip(clip = "off") +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.35))
  ) +
  
  # 🎨 Colores exactos definidos manualmente
  scale_fill_identity() +
  
  labs(
    title = "Distribución de ventas por retailer",
    subtitle = "Asignación de colores por identidad de marca",
    x = NULL,
    y = "Ventas (Millones USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    plot.margin = margin(10, 50, 10, 10),
    plot.background = element_rect(fill = "white", color = NA)
  )

library(dplyr)
library(ggplot2)
library(scales)
library(stringr)

# 📊 Preparación de datos
datos_metodo <- datadidas %>%
  group_by(metodo_venta) %>%
  summarise(
    ventas = sum(ventas_total, na.rm = TRUE)
  ) %>%
  arrange(desc(ventas)) %>%
  mutate(
    participacion = ventas / sum(ventas),
    
    # 🔥 Colores estratégicos por método de venta
    color_metodo = case_when(
      str_detect(metodo_venta, regex("Online|Web|E-commerce", ignore_case = TRUE)) ~ "#2C7FB8",  # azul digital
      str_detect(metodo_venta, regex("Store|In-store|Retail", ignore_case = TRUE)) ~ "#1A9641",  # verde físico
      str_detect(metodo_venta, regex("Outlet", ignore_case = TRUE)) ~ "#FDAE61",                # naranja outlet
      TRUE ~ "#7B3294"  # otros métodos (morado distintivo)
    ),
    
    metodo_venta = reorder(metodo_venta, ventas)
  )

# 📈 Gráfico
ggplot(datos_metodo, aes(x = metodo_venta, y = ventas, fill = color_metodo)) +
  
  geom_col(width = 0.6) +
  
  # 🧾 Etiquetas consistentes
  geom_text(
    aes(label = paste0(
      dollar(ventas, scale = 1e-6, suffix = "M"),
      " | ",
      percent(participacion, accuracy = 0.1)
    )),
    hjust = -0.2,
    size = 4,
    fontface = "bold"
  ) +
  
  # 🔥 Evita cortes
  coord_flip(clip = "off") +
  
  scale_y_continuous(
    labels = dollar_format(scale = 1e-6, suffix = "M"),
    expand = expansion(mult = c(0, 0.35))
  ) +
  
  # 🎨 Colores definidos manualmente
  scale_fill_identity() +
  
  labs(
    title = "Distribución de ventas por método de venta",
    subtitle = "Comparación entre canales de comercialización",
    x = NULL,
    y = "Ventas (Millones USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11),
    plot.margin = margin(10, 50, 10, 10),
    plot.background = element_rect(fill = "white", color = NA)
  )

Análisis de Rentabilidad

El margen operativo mide qué proporción de las ventas se convierte en utilidad, independientemente del volumen vendido. Es el indicador más relevante para evaluar la eficiencia y sostenibilidad del negocio.

library(dplyr)
library(tidyr)
library(scales)
library(kableExtra)

# 📊 Tabla de margen tipo dashboard
tabla_margen <- datadidas %>%
  summarise(
    Margen_Promedio = mean(margen_operativo, na.rm = TRUE),
    Margen_Mediana  = median(margen_operativo, na.rm = TRUE),
    Margen_Minimo   = min(margen_operativo, na.rm = TRUE),
    Margen_Maximo   = max(margen_operativo, na.rm = TRUE),
    Margen_DesvEst  = sd(margen_operativo, na.rm = TRUE)
  ) %>%
  pivot_longer(
    everything(),
    names_to = c("Variable", "Indicador"),
    names_sep = "_",
    values_to = "Valor"
  ) %>%
  select(-Variable) %>%
  mutate(
    # 🔥 Orden tipo dashboard
    Indicador = factor(Indicador, 
                       levels = c("Promedio", "Mediana", "Minimo", "Maximo", "DesvEst")),
    
    # 🎨 Formato porcentaje
    Valor = percent(Valor, accuracy = 0.1)
  ) %>%
  arrange(Indicador)

# 📋 Tabla final estilo dashboard
kable(
  tabla_margen,
  col.names = c("Indicador", "Margen Operativo"),
  caption = "Resumen estadístico del margen operativo",
  align = "c",
  booktabs = TRUE
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  
  # 🎨 Header
  row_spec(0, bold = TRUE, background = "#2C3E50", color = "white") %>%
  
  # 🎯 Columna margen destacada
  column_spec(2, bold = TRUE, color = "#2C7FB8")

Resumen estadístico del margen operativo
Indicador	Margen Operativo
Promedio	42.3%
Mediana	41.0%
Minimo	10.0%
Maximo	80.0%
DesvEst	9.7%

library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(scales)

# 📊 Datos en formato largo, usando un nombre de columna único
datos_long <- datadidas %>%
  select(margen_operativo, utilidad_operativa) %>%
  pivot_longer(
    cols = everything(),
    names_to = "tipo_variable",   # nombre único
    values_to = "valor"
  )

# 🔢 Número óptimo de bins (Sturges)
n <- nrow(datadidas)
bins_opt <- ceiling(log2(n) + 1)

# 🟡 Calcular media y mediana por variable
resumen <- datos_long %>%
  group_by(tipo_variable) %>%
  summarise(
    media = mean(valor, na.rm = TRUE),
    mediana = median(valor, na.rm = TRUE)
  )

# 📈 Gráfico interactivo
ggplotly(
  ggplot(datos_long, aes(x = valor, fill = tipo_variable)) +
    
    geom_histogram(
      bins = bins_opt,
      color = "black",
      alpha = 0.7
    ) +
    
    # 🔴 Media
    geom_vline(
      data = resumen,
      aes(xintercept = media, color = tipo_variable),
      linetype = "dashed",
      linewidth = 1
    ) +
    
    # 🟢 Mediana
    geom_vline(
      data = resumen,
      aes(xintercept = mediana, color = tipo_variable),
      linetype = "dotted",
      linewidth = 1
    ) +
    
    facet_wrap(~tipo_variable, scales = "free_x") +
    
    scale_fill_manual(values = c(
      "margen_operativo" = "#2C7FB8",
      "utilidad_operativa" = "#1A9641"
    )) +
    
    scale_color_manual(values = c(
      "margen_operativo" = "red",
      "utilidad_operativa" = "darkgreen"
    )) +
    
    labs(
      title = "Distribución comparativa: Margen vs Utilidad",
      subtitle = "Histogramas con media (línea punteada) y mediana (línea discontinua)",
      x = "Valor",
      y = "Frecuencia"
    ) +
    
    theme_minimal(base_size = 12) +
    theme(legend.position = "none")
)

library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(scales)

datos_long <- datadidas %>%
  select(margen_operativo, utilidad_operativa) %>%
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "valor"
  )

ggplotly(
  ggplot(datos_long, aes(x = variable, y = valor, fill = variable)) +
    
    geom_boxplot(alpha = 0.7, width = 0.5, outlier.color = "red") +
    
    facet_wrap(~variable, scales = "free_y") +
    
    # 🔥 formato correcto SIN ifelse
    scale_y_continuous(labels = comma) +
    
    scale_fill_manual(values = c(
      "margen_operativo" = "#2C7FB8",
      "utilidad_operativa" = "#1A9641"
    )) +
    
    labs(
      title = "Distribución comparativa: Margen vs Utilidad",
      subtitle = "Escalas independientes por variable",
      x = NULL,
      y = "Valor"
    ) +
    
    theme_minimal() +
    theme(legend.position = "none")
)

rentabilidad por producto

library(dplyr)
library(ggplot2)
library(plotly)
library(scales)

# 🎨 Paleta consistente tipo Adidas
colores_producto <- c(
  "Women's Street Footwear" = "#1f77b4",
  "Women's Athletic Footwear" = "#ff7f0e",
  "Women's Apparel" = "#2ca02c",
  "Men's Street Footwear" = "#d62728",
  "Men's Athletic Footwear" = "#9467bd",
  "Men's Apparel" = "#8c564b"
)

# 🟡 Calcular media por producto
media_producto <- datadidas %>%
  group_by(producto) %>%
  summarise(media = mean(margen_operativo, na.rm = TRUE))

# 📊 Gráfico mejorado
p <- ggplot(datadidas, aes(x = producto, y = margen_operativo, fill = producto)) +
  
  # Boxplot
  geom_boxplot(alpha = 0.7, outlier.color = "black") +
  
  # 🔴 Media como punto
  geom_point(
    data = media_producto,
    aes(x = producto, y = media),
    color = "red",
    size = 3
  ) +
  
  # 🔴 Línea horizontal pequeña (media)
  stat_summary(
    fun = mean,
    geom = "crossbar",
    width = 0.5,
    color = "red",
    fatten = 0
  ) +
  
  coord_flip() +
  
  scale_y_continuous(labels = percent_format()) +
  
  scale_fill_manual(values = colores_producto) +
  
  labs(
    title = "Distribución del margen por producto",
    subtitle = "Boxplot con media (punto rojo) + interactividad",
    x = "Producto",
    y = "Margen operativo"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(legend.position = "none")

# 🔥 Convertir a interactivo
ggplotly(p, tooltip = c("x", "y"))

library(dplyr)
library(ggplot2)
library(scales)

# 📊 Preparar datos
bubble_data <- datadidas %>%
  group_by(producto, metodo_venta) %>%
  summarise(
    margen = mean(margen_operativo, na.rm = TRUE),
    utilidad = sum(utilidad_operativa, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    utilidad_miles = utilidad / 1e3,
    etiqueta = paste0(round(utilidad_miles, 0), "K"),
    producto = reorder(producto, utilidad)
  )

# 🎨 Gráfico
ggplot(bubble_data, aes(
  x = margen,
  y = producto,
  size = utilidad_miles,
  color = metodo_venta
)) +
  
  # 🔵 Burbujas
  geom_point(alpha = 0.85) +
  
  # 🔥 Etiquetas en negro (más legibles)
  geom_text(
    aes(label = etiqueta),
    color = "black",
    size = 3,
    fontface = "bold",
    vjust = -0.8   # un poco arriba del punto
  ) +
  
  # 📏 Tamaño
  scale_size_continuous(
    range = c(5, 18),
    labels = function(x) paste0(round(x,0), "K"),
    name = "Utilidad (Miles)"
  ) +
  
  # 🎨 Colores
  scale_color_manual(values = c(
    "In-store" = "#E74C3C",
    "Online" = "#27AE60",
    "Outlet" = "#2980B9"
  )) +
  
  # 📊 Eje %
  scale_x_continuous(labels = percent_format()) +
  
  labs(
    title = "Rentabilidad por Producto y Método de Venta",
    subtitle = "Tamaño = Utilidad en miles",
    x = "Margen operativo",
    y = "Producto",
    color = "Método de venta"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "right"
  )

library(dplyr)
library(ggplot2)
library(scales)

# 📊 Datos
bar_data <- datadidas %>%
  group_by(producto, region) %>%
  summarise(
    margen = mean(margen_operativo, na.rm = TRUE),
    .groups = "drop"
  )

# 📊 Gráfico
ggplot(bar_data, aes(x = producto, y = margen, fill = region)) +
  
  geom_col(position = "dodge") +
  
  coord_flip() +
  
  scale_y_continuous(labels = percent_format()) +
  
  scale_fill_brewer(palette = "Set2") +
  
  labs(
    title = "Margen operativo por Producto y Región",
    subtitle = "Comparación de rentabilidad por segmento",
    x = "Producto",
    y = "Margen operativo",
    fill = "Región"
  ) +
  
  theme_minimal(base_size = 12)

Comparativo de margen operativo por método de venta y región

datadidas %>%
  group_by(metodo_venta, region) %>%
  summarise(margen_prom = mean(margen_operativo, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = region, y = margen_prom, fill = metodo_venta)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            position = position_dodge(width = 0.7),
            vjust = -0.4, size = 2.8) +
  scale_y_continuous(
    labels = scales::percent_format(),
    expand = expansion(mult = c(0, 0.15))
  ) +
  scale_fill_brewer(palette = "Set1") +
  labs(
    title = "Figura 13. Margen Operativo por Método de Venta y Región",
    x     = "Región",
    y     = "Margen Operativo Promedio",
    fill  = "Método de Venta"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position = "bottom",
    axis.text.x     = element_text(angle = 15, hjust = 1)
  )

Comparativo de margen operativo por método de venta y producto

datadidas %>%
  group_by(metodo_venta, producto) %>%
  summarise(margen_prom = mean(margen_operativo, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = producto, y = margen_prom, fill = metodo_venta)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            position = position_dodge(width = 0.7),
            vjust = -0.4, size = 2.8) +
  scale_y_continuous(
    labels = scales::percent_format(),
    expand = expansion(mult = c(0, 0.15))
  ) +
  scale_fill_brewer(palette = "Set1") +
  labs(
    title = "Figura 13. Margen Operativo por Método de Venta y Producto",
    x     = "Producto",
    y     = "Margen Operativo Promedio",
    fill  = "Método de Venta"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position = "bottom",
    axis.text.x     = element_text(angle = 15, hjust = 1)
  )

datadidas %>%
  group_by(region) %>%
  summarise(margen_prom = mean(margen_operativo)) %>%
  mutate(region = reorder(region, margen_prom)) %>%
  ggplot(aes(x = region, y = margen_prom, fill = margen_prom)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            hjust = -0.1, size = 3.5) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(),
                     expand = expansion(mult = c(0, 0.2))) +
  scale_fill_gradient(low = "#a8d5e2", high = "#1a6b8a", guide = "none") +
  labs(title = "Figura 4. Margen Operativo Promedio por Región",
       x     = "Región",
       y     = "Margen Operativo Promedio") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major.y = element_blank())

ventas_metodo <- datadidas %>%
  group_by(`Método de Venta` = metodo_venta) %>%
  summarise(
    `Ventas Totales (USD)` = sum(ventas_total, na.rm = TRUE),
    `Utilidad (USD)`       = sum(utilidad_operativa, na.rm = TRUE),
    `Margen Promedio`      = mean(margen_operativo, na.rm = TRUE),
    `Unidades Vendidas`    = sum(unidades_vendidas, na.rm = TRUE),
  ) %>%
  arrange(desc(`Ventas Totales (USD)`)) %>%
  mutate(
    `Participación`        = `Ventas Totales (USD)` / sum(`Ventas Totales (USD)`),
    `Ventas Totales (USD)` = scales::dollar(`Ventas Totales (USD)`),
    `Utilidad (USD)`       = scales::dollar(`Utilidad (USD)`),
    `Margen Promedio`      = scales::percent(`Margen Promedio`, accuracy = 0.1),
    `Unidades Vendidas`    = scales::comma(`Unidades Vendidas`),
    `Participación`        = scales::percent(`Participación`, accuracy = 0.1)
  )

kable(ventas_metodo,
      caption = "Resumen de Ventas por Método de Venta",
      booktabs = TRUE,
      align = c("l", "r", "r", "r", "r", "r", "r")) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  ) %>%
  column_spec(2, bold = TRUE)

Resumen de Ventas por Método de Venta
Método de Venta	Ventas Totales (USD)	Utilidad (USD)	Margen Promedio	Unidades Vendidas	Participación
Online	$44,965,657	$19,552,538	46.4%	939,093	37.4%
Outlet	$39,536,618	$14,913,301	39.5%	849,778	32.9%
In-store	$35,664,375	$12,759,129	35.6%	689,990	29.7%

  datadidas %>%
  group_by(metodo_venta) %>%
  summarise(margen_prom = mean(margen_operativo)) %>%
  mutate(metodo = reorder(metodo_venta, margen_prom)) %>%
  ggplot(aes(x = metodo, y = margen_prom, fill = margen_prom)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            hjust = -0.1, size = 3.0) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(),
                     expand = expansion(mult = c(0, 0.2))) +
  scale_fill_gradient(low = "#f9e4b7", high = "#e67e22", guide = "none") +
  labs(title = "Figura 6. Margen Operativo Promedio por Método de Venta",
       x     = "Método de Venta",
       y     = "Margen Operativo Promedio") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major.y = element_blank())

ventas_producto <- datadidas %>%
  group_by(`Producto` = producto) %>%
  summarise(
    `Ventas Totales (USD)` = sum(ventas_total, na.rm = TRUE),
    `Utilidad (USD)`       = sum(utilidad_operativa, na.rm = TRUE),
    `Margen Promedio`      = mean(margen_operativo, na.rm = TRUE),
    `Unidades Vendidas`    = sum(unidades_vendidas, na.rm = TRUE),
  ) %>%
  arrange(desc(`Ventas Totales (USD)`)) %>%
  mutate(
    `Participación`        = `Ventas Totales (USD)` / sum(`Ventas Totales (USD)`),
    `Ventas Totales (USD)` = scales::dollar(`Ventas Totales (USD)`),
    `Utilidad (USD)`       = scales::dollar(`Utilidad (USD)`),
    `Margen Promedio`      = scales::percent(`Margen Promedio`, accuracy = 0.1),
    `Unidades Vendidas`    = scales::comma(`Unidades Vendidas`),
    `Participación`        = scales::percent(`Participación`, accuracy = 0.1)
  )

kable(ventas_producto,
      caption = "Resumen de Ventas por Producto",
      booktabs = TRUE,
      align = c("l", "r", "r", "r", "r", "r", "r")) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  ) %>%
  column_spec(2, bold = TRUE)

Resumen de Ventas por Producto
Producto	Ventas Totales (USD)	Utilidad (USD)	Margen Promedio	Unidades Vendidas	Participación
Men’s Street Footwear	$27,680,769	$11,629,046	44.6%	593,320	23.0%
Women’s Apparel	$23,870,985	$9,685,221	44.1%	433,827	19.9%
Men’s Athletic Footwear	$20,577,180	$7,437,457	40.3%	435,526	17.1%
Women’s Street Footwear	$17,201,563	$6,494,017	41.0%	392,269	14.3%
Men’s Apparel	$16,520,632	$6,381,405	41.3%	306,683	13.7%
Women’s Athletic Footwear	$14,315,521	$5,597,822	42.4%	317,236	11.9%

  datadidas %>%
  group_by(producto) %>%
  summarise(margen_prom = mean(margen_operativo)) %>%
  mutate(Producto = reorder(producto, margen_prom)) %>%
  ggplot(aes(x = Producto, y = margen_prom, fill = margen_prom)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(margen_prom, accuracy = 0.1)),
            hjust = -0.1, size = 3.0) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(),
                     expand = expansion(mult = c(0, 0.2))) +
  scale_fill_gradient(low = "#d9d9d9", high = "#252525", guide = "none") +
  labs(title = "Figura 6. Margen Operativo Promedio por Producto",
       x     = "Producto",
       y     = "Margen Operativo Promedio") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major.y = element_blank())

Relación entre Ventas y Utilidad Operativa

Esta sección examina si un mayor volumen de ventas se traduce proporcionalmente en mayor utilidad, o si existen ineficiencias que limitan la rentabilidad.

library(ggplot2)
library(scales)

# 📊 Correlación
correlacion <- cor(datadidas$ventas_total, 
                   datadidas$utilidad_operativa, 
                   use = "complete.obs")

ggplot(datadidas, aes(x = ventas_total, y = utilidad_operativa)) +
  
  # 🔵 Puntos más suaves
  geom_point(alpha = 0.2, size = 1.2, color = "#2C3E50") +
  
  # 📈 Línea de tendencia
  geom_smooth(
    method = "lm",
    se = TRUE,
    color = "#27AE60",
    linetype = "dashed",
    linewidth = 1
  ) +
  
  # 🔥 Texto de correlación
  annotate(
    "text",
    x = Inf, y = -Inf,
    label = paste0("Correlación: ", round(correlacion, 3)),
    hjust = 1.1, vjust = -1.2,
    size = 4,
    fontface = "bold"
  ) +
  
  # 💲 Formato miles
  scale_x_continuous(labels = dollar_format(scale = 1e-3, suffix = "K")) +
  scale_y_continuous(labels = dollar_format(scale = 1e-3, suffix = "K")) +
  
  labs(
    title = "Figura 5. Ventas Totales vs. Utilidad Operativa",
    subtitle = "Relación lineal entre ventas y rentabilidad",
    x = "Ventas Totales (Miles USD)",
    y = "Utilidad Operativa (Miles USD)"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank()
  )

cor_val <- cor(datadidas$ventas_total, datadidas$utilidad_operativa)

data.frame(
  Métrica = "Correlación de Pearson (Ventas vs. Utilidad)",
  Valor   = round(cor_val, 3)
) %>%
  kable(caption = "Correlación entre Ventas Totales y Utilidad Operativa",
        booktabs = TRUE,
        align = c("l", "r")) %>%
  kable_styling(latex_options = c("striped", "hold_position"),
                font_size = 11)

Correlación entre Ventas Totales y Utilidad Operativa
Métrica	Valor
Correlación de Pearson (Ventas vs. Utilidad)	0.935

Relación entre variables clave

library(ggplot2)
library(plotly)
library(scales)

# 📊 Correlación
cor_precio <- cor(datadidas$precio_unidad,
                  datadidas$unidades_vendidas,
                  use = "complete.obs")

# 📈 Gráfico interactivo
ggplotly(
  ggplot(datadidas, aes(x = precio_unidad, y = unidades_vendidas)) +
    
    # 🔵 Puntos
    geom_point(color = "#2C7FB8", alpha = 0.5, size = 2) +
    
    # 📈 Tendencia
    geom_smooth(
      method = "lm",
      color = "#D7191C",
      linetype = "dashed",
      linewidth = 1
    ) +
    
    # 📊 Formato de ejes consistente
    scale_x_continuous(labels = dollar_format()) +
    scale_y_continuous(labels = comma_format()) +
    
    labs(
      title = paste0(
        "Figura X. Relación entre Precio por Unidad y Unidades Vendidas\n",
        "Coeficiente de correlación: ", round(cor_precio, 3)
      ),
      subtitle = "Análisis del comportamiento de la demanda",
      x = "Precio por unidad (USD)",
      y = "Unidades vendidas",
      caption = "Fuente: Elaboración propia con base en datos de Adidas"
    ) +
    
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
      plot.subtitle = element_text(size = 11, hjust = 0.5),
      axis.title = element_text(size = 11),
      axis.text = element_text(size = 10),
      panel.grid.minor = element_blank()
    ),
  
  tooltip = c("x", "y")
)

¿El precio influye en la rentabilidad?

library(ggplot2)
library(plotly)
library(scales)

# 📊 Correlación
cor_precio_margen <- cor(datadidas$precio_unidad,
                         datadidas$margen_operativo,
                         use = "complete.obs")

# 📈 Gráfico
ggplotly(
  ggplot(datadidas, aes(x = precio_unidad, y = margen_operativo)) +
    
    geom_point(color = "#2C7FB8", alpha = 0.5, size = 2) +
    
    geom_smooth(
      method = "lm",
      color = "#D7191C",
      linetype = "dashed",
      linewidth = 1
    ) +
    
    scale_x_continuous(labels = dollar_format()) +
    scale_y_continuous(labels = percent_format()) +
    
    labs(
      title = paste0(
        "Figura X. Relación entre Precio por Unidad y Margen Operativo\n",
        "Coeficiente de correlación: ", round(cor_precio_margen, 3)
      ),
      subtitle = "Análisis del impacto del precio en la rentabilidad",
      x = "Precio por unidad (USD)",
      y = "Margen operativo (%)",
      caption = "Fuente: Elaboración propia con base en datos de Adidas"
    ) +
    
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
      plot.subtitle = element_text(size = 11, hjust = 0.5),
      panel.grid.minor = element_blank()
    ),
  
  tooltip = c("x", "y")
)

• ¿Qué variables parecen estar más asociadas con la utilidad operativa?

library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)

# 📊 Matriz de correlación
datos_corr <- datadidas %>%
  select(precio_unidad, unidades_vendidas, ventas_total, utilidad_operativa, margen_operativo)

cor_mat <- round(cor(datos_corr, use = "complete.obs"), 3)

# 🔄 Convertir a formato largo
cor_long <- cor_mat %>%
  as.data.frame() %>%
  mutate(var1 = rownames(.)) %>%
  pivot_longer(-var1, names_to = "var2", values_to = "cor") %>%
  
  # 🔥 Filtrar solo triángulo inferior
  filter(as.numeric(factor(var1)) > as.numeric(factor(var2)))

# 🎨 Heatmap estilo dashboard
ggplot(cor_long, aes(x = var2, y = var1, fill = cor)) +
  
  geom_tile(color = "white") +
  
  # 🔢 Valores dentro
  geom_text(aes(label = round(cor, 2)), size = 4, fontface = "bold") +
  
  # 🎨 Escala divergente (ROJO - BLANCO - VERDE)
  scale_fill_gradient2(
    low = "#D73027",   # negativo
    mid = "white",
    high = "#1A9850",  # positivo
    midpoint = 0,
    limits = c(-1, 1),
    name = "Correlación"
  ) +
  
  labs(
    title = "Matriz de correlación entre variables clave",
    subtitle = "Relaciones entre precio, volumen y rentabilidad",
    x = NULL,
    y = NULL
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold"),
    legend.position = "right"
  )

Caso 1

Daniel Medina Galofre

2026-04-02