# Cargar paquetes necesarios
library(readr) # Para leer archivos CSV
library(dplyr) # Para manipulación de nike_sales_2024
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # Para visualización de nike_sales_2024
## Warning: package 'ggplot2' was built under R version 4.4.2
library(skimr) # Para análisis exploratorio rápido
## Warning: package 'skimr' was built under R version 4.4.2
library(readxl) # Para leer archivos Excel
library(gridExtra) # Para organizar gráficos
##
## Adjuntando el paquete: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(moments) # Para cálculo de simetría y curtosis
library(kableExtra) # Para tablas en RMarkdown
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Cargar los nike_sales_2024
nike_sales_2024 <- read_excel("D:/Usuario windows/Downloads/nike_sales_2024.xlsx")
# Mostrar estructura y resumen
str(nike_sales_2024)
## tibble [1,000 × 10] (S3: tbl_df/tbl/data.frame)
## $ Month : chr [1:1000] "November" "January" "October" "December" ...
## $ Region : chr [1:1000] "India" "India" "India" "Greater China" ...
## $ Main_Category : chr [1:1000] "Equipment" "Equipment" "Apparel" "Footwear" ...
## $ Sub_Category : chr [1:1000] "Bags" "Accessories" "Tops" "Cricket" ...
## $ Product_Line : chr [1:1000] "Gym Sack" "Hats" "Tech Fleece" "Vapor Cricket" ...
## $ Price_Tier : chr [1:1000] "Budget" "Budget" "Mid-Range" "Premium" ...
## $ Units_Sold : num [1:1000] 48356 9842 25079 41404 33569 ...
## $ Revenue_USD : num [1:1000] 14506800 2066820 1755530 8694840 5371040 ...
## $ Online_Sales_Percentage: num [1:1000] 73 50 90 58 53 73 50 55 78 86 ...
## $ Retail_Price : num [1:1000] 300 210 70 210 160 140 230 150 150 230 ...
summary(nike_sales_2024)
## Month Region Main_Category Sub_Category
## Length:1000 Length:1000 Length:1000 Length:1000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Product_Line Price_Tier Units_Sold Revenue_USD
## Length:1000 Length:1000 Min. : 5028 Min. : 287400
## Class :character Class :character 1st Qu.:17554 1st Qu.: 2344675
## Mode :character Mode :character Median :28685 Median : 4328020
## Mean :28499 Mean : 5039576
## 3rd Qu.:40026 3rd Qu.: 7264942
## Max. :49992 Max. :14864700
## Online_Sales_Percentage Retail_Price
## Min. :50.00 Min. : 50.0
## 1st Qu.:60.00 1st Qu.:110.0
## Median :71.00 Median :180.0
## Mean :70.04 Mean :176.3
## 3rd Qu.:80.00 3rd Qu.:240.0
## Max. :90.00 Max. :300.0
Identificar tendencias y patrones de crecimiento: Analizar la evolución de las ventas anuales de Nike para detectar patrones de crecimiento, estacionalidad y posibles fluctuaciones en el rendimiento financiero a lo largo del tiempo.
tipo_variable <- ifelse(sapply(nike_sales_2024, is.numeric), "Cuantitativa", "Cualitativa")
variables_ordinales <- c("Price_Tier")
escala_medida <- ifelse(tipo_variable == "Cuantitativa", "Razón",
ifelse(names(nike_sales_2024) %in% variables_ordinales, "Ordinal", "Nominal"))
frecuencias <- ifelse(tipo_variable == "Cualitativa", "Sí", "Agrupadas")
medidas_localizacion <- ifelse(tipo_variable == "Cuantitativa", "Media, Mediana, Moda", "Moda")
medidas_dispersion <- ifelse(tipo_variable == "Cuantitativa", "Desviación estándar, Rango intercuartílico", "N/A")
medidas_distribucion <- ifelse(tipo_variable == "Cuantitativa", "Asimetría, Curtosis", "N/A")
graficos <- ifelse(tipo_variable == "Cuantitativa", "Histograma, Boxplot",
ifelse(escala_medida == "Ordinal", "Diagrama de barras ordenado", "Gráfico de barras"))
tabla_operacionalizacion <- data.frame(
Variable = names(nike_sales_2024),
Naturaleza_Variable = tipo_variable,
Escala_de_Medidas = escala_medida,
Frecuencias = frecuencias,
Medidas_Localizacion = medidas_localizacion,
Medidas_Dispersion = medidas_dispersion,
Medidas_Distribucion = medidas_distribucion,
Graficos_Sugeridos = graficos,
stringsAsFactors = FALSE
)
tabla_operacionalizacion %>%
kbl(caption = "Tabla de Operacionalización de Variables", escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>%
column_spec(1, bold = TRUE, color = "white", background = "#4CAF50") %>%
column_spec(2:8, background = "#f0f0f0")
Variable | Naturaleza_Variable | Escala_de_Medidas | Frecuencias | Medidas_Localizacion | Medidas_Dispersion | Medidas_Distribucion | Graficos_Sugeridos | |
---|---|---|---|---|---|---|---|---|
Month | Month | Cualitativa | Nominal | Sí | Moda | N/A | N/A | Gráfico de barras |
Region | Region | Cualitativa | Nominal | Sí | Moda | N/A | N/A | Gráfico de barras |
Main_Category | Main_Category | Cualitativa | Nominal | Sí | Moda | N/A | N/A | Gráfico de barras |
Sub_Category | Sub_Category | Cualitativa | Nominal | Sí | Moda | N/A | N/A | Gráfico de barras |
Product_Line | Product_Line | Cualitativa | Nominal | Sí | Moda | N/A | N/A | Gráfico de barras |
Price_Tier | Price_Tier | Cualitativa | Ordinal | Sí | Moda | N/A | N/A | Diagrama de barras ordenado |
Units_Sold | Units_Sold | Cuantitativa | Razón | Agrupadas | Media, Mediana, Moda | Desviación estándar, Rango intercuartílico | Asimetría, Curtosis | Histograma, Boxplot |
Revenue_USD | Revenue_USD | Cuantitativa | Razón | Agrupadas | Media, Mediana, Moda | Desviación estándar, Rango intercuartílico | Asimetría, Curtosis | Histograma, Boxplot |
Online_Sales_Percentage | Online_Sales_Percentage | Cuantitativa | Razón | Agrupadas | Media, Mediana, Moda | Desviación estándar, Rango intercuartílico | Asimetría, Curtosis | Histograma, Boxplot |
Retail_Price | Retail_Price | Cuantitativa | Razón | Agrupadas | Media, Mediana, Moda | Desviación estándar, Rango intercuartílico | Asimetría, Curtosis | Histograma, Boxplot |
cat("\nInformación general del dataset:")
##
## Información general del dataset:
print(str(nike_sales_2024))
## tibble [1,000 × 10] (S3: tbl_df/tbl/data.frame)
## $ Month : chr [1:1000] "November" "January" "October" "December" ...
## $ Region : chr [1:1000] "India" "India" "India" "Greater China" ...
## $ Main_Category : chr [1:1000] "Equipment" "Equipment" "Apparel" "Footwear" ...
## $ Sub_Category : chr [1:1000] "Bags" "Accessories" "Tops" "Cricket" ...
## $ Product_Line : chr [1:1000] "Gym Sack" "Hats" "Tech Fleece" "Vapor Cricket" ...
## $ Price_Tier : chr [1:1000] "Budget" "Budget" "Mid-Range" "Premium" ...
## $ Units_Sold : num [1:1000] 48356 9842 25079 41404 33569 ...
## $ Revenue_USD : num [1:1000] 14506800 2066820 1755530 8694840 5371040 ...
## $ Online_Sales_Percentage: num [1:1000] 73 50 90 58 53 73 50 55 78 86 ...
## $ Retail_Price : num [1:1000] 300 210 70 210 160 140 230 150 150 230 ...
## NULL
cat("\nResumen estadístico:")
##
## Resumen estadístico:
print(summary(nike_sales_2024))
## Month Region Main_Category Sub_Category
## Length:1000 Length:1000 Length:1000 Length:1000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Product_Line Price_Tier Units_Sold Revenue_USD
## Length:1000 Length:1000 Min. : 5028 Min. : 287400
## Class :character Class :character 1st Qu.:17554 1st Qu.: 2344675
## Mode :character Mode :character Median :28685 Median : 4328020
## Mean :28499 Mean : 5039576
## 3rd Qu.:40026 3rd Qu.: 7264942
## Max. :49992 Max. :14864700
## Online_Sales_Percentage Retail_Price
## Min. :50.00 Min. : 50.0
## 1st Qu.:60.00 1st Qu.:110.0
## Median :71.00 Median :180.0
## Mean :70.04 Mean :176.3
## 3rd Qu.:80.00 3rd Qu.:240.0
## Max. :90.00 Max. :300.0
# Seleccionar variables numéricas y categóricas
numeric_vars <- nike_sales_2024 %>% select(where(is.numeric))
categorical_vars <- nike_sales_2024 %>% select(where(is.character) | where(is.factor))
# --- 1. Histogramas de variables numéricas ---
histograms <- lapply(names(numeric_vars), function(var) {
ggplot(nike_sales_2024, aes(x = .data[[var]])) +
geom_histogram(bins = 30, fill = "blue", alpha = 0.6, color = "black") +
theme_minimal() +
ggtitle(paste("Histograma de", var))
})
if (length(histograms) > 0) {
do.call(grid.arrange, c(histograms, ncol = 2))
}
# --- 2. Boxplots para detectar outliers ---
boxplots <- lapply(names(numeric_vars), function(var) {
ggplot(nike_sales_2024, aes(y = .data[[var]])) +
geom_boxplot(fill = "red", alpha = 0.6, outlier.color = "black") +
theme_minimal() +
ggtitle(paste("Boxplot de", var))
})
if (length(boxplots) > 0) {
do.call(grid.arrange, c(boxplots, ncol = 2))
}
# --- 3. Gráficos de dispersión entre pares de variables numéricas ---
if (ncol(numeric_vars) >= 2) {
scatter_plots <- lapply(combn(names(numeric_vars), 2, simplify = FALSE), function(vars) {
ggplot(nike_sales_2024, aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) +
geom_point(alpha = 0.6, color = "darkgreen") +
theme_minimal() +
ggtitle(paste("Dispersión entre", vars[1], "y", vars[2]))
})
if (length(scatter_plots) > 0) {
do.call(grid.arrange, c(scatter_plots, ncol = 2))
}
}
# --- 4. Gráficos de barras para variables categóricas ---
if (ncol(categorical_vars) > 0) {
barplots <- lapply(names(categorical_vars), function(var) {
ggplot(nike_sales_2024, aes(x = .data[[var]])) +
geom_bar(fill = "purple", alpha = 0.6) +
theme_minimal() +
ggtitle(paste("Distribución de", var)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
})
if (length(barplots) > 0) {
do.call(grid.arrange, c(barplots, ncol = 2))
}
}
# --- 5. Prueba de normalidad (Shapiro-Wilk) ---
normality_results <- sapply(numeric_vars, function(x) shapiro.test(x)$p.value)
# Crear dataframe con resultados
normality_df <- data.frame(Variable = names(normality_results),
P_Value = normality_results,
Normality = ifelse(normality_results > 0.05, "Normal", "No Normal"))
print(normality_df)
## Variable P_Value Normality
## Units_Sold Units_Sold 3.559201e-17 No Normal
## Revenue_USD Revenue_USD 7.832045e-20 No Normal
## Online_Sales_Percentage Online_Sales_Percentage 8.222526e-17 No Normal
## Retail_Price Retail_Price 1.708022e-17 No Normal
# --- 6. Análisis de Simetría (Skewness) ---
skewness_results <- sapply(numeric_vars, skewness)
# Crear dataframe con resultados
skewness_df <- data.frame(Variable = names(skewness_results),
Skewness = skewness_results)
skewness_df <- skewness_df %>%
mutate(Interpretation = case_when(
Skewness > 1 ~ "Sesgo positivo alto (asimétrica a la derecha)",
Skewness > 0.5 & Skewness <= 1 ~ "Sesgo positivo moderado",
Skewness >= -0.5 & Skewness <= 0.5 ~ "Simétrica o aproximadamente normal",
Skewness >= -1 & Skewness < -0.5 ~ "Sesgo negativo moderado",
Skewness < -1 ~ "Sesgo negativo alto (asimétrica a la izquierda)"
))
print(skewness_df)
## Variable Skewness
## Units_Sold Units_Sold -0.063246745
## Revenue_USD Revenue_USD 0.721613314
## Online_Sales_Percentage Online_Sales_Percentage 0.009362956
## Retail_Price Retail_Price -0.015083579
## Interpretation
## Units_Sold Simétrica o aproximadamente normal
## Revenue_USD Sesgo positivo moderado
## Online_Sales_Percentage Simétrica o aproximadamente normal
## Retail_Price Simétrica o aproximadamente normal
# 5. Comparación entre grupos
table(nike_sales_2024$Month) # Ver los meses disponibles
##
## April August December February January July June March
## 68 75 96 96 78 90 91 73
## May November October September
## 85 72 85 91
if ("Month" %in% names(nike_sales_2024) && length(unique(nike_sales_2024$Month)) > 1) {
# Seleccionar los dos primeros meses disponibles
meses <- unique(nike_sales_2024$Month)[1:2]
nike_sales_2024_filtered <- nike_sales_2024 %>% filter(Month %in% meses)
for (var in names(nike_sales_2024_filtered)[sapply(nike_sales_2024_filtered, is.numeric)]) {
p <- ggplot(nike_sales_2024_filtered, aes(x = as.factor(Month), y = .data[[var]])) +
geom_boxplot(fill = "lightblue") +
ggtitle(paste("Comparación de", var, "entre los meses", meses[1], "y", meses[2]))
print(p)
# Prueba de diferencia de medias
group1 <- nike_sales_2024_filtered %>% filter(Month == meses[1]) %>% pull(var)
group2 <- nike_sales_2024_filtered %>% filter(Month == meses[2]) %>% pull(var)
t_test <- t.test(group1, group2, var.equal = FALSE)
cat("\nComparación de", var, "entre", meses[1], "y", meses[2], ": p-value =", round(t_test$p.value, 4), "\n")
if (t_test$p.value < 0.05) {
cat("Diferencia estadísticamente significativa.\n")
} else {
cat("No hay diferencia estadísticamente significativa.\n")
}
}
} else {
cat("No hay suficientes meses en la base de nike_sales_2024 para realizar la comparación.\n")
}
##
## Comparación de Units_Sold entre November y January : p-value = 0.8697
## No hay diferencia estadísticamente significativa.
##
## Comparación de Revenue_USD entre November y January : p-value = 0.2074
## No hay diferencia estadísticamente significativa.
##
## Comparación de Online_Sales_Percentage entre November y January : p-value = 0.7947
## No hay diferencia estadísticamente significativa.
##
## Comparación de Retail_Price entre November y January : p-value = 0.3531
## No hay diferencia estadísticamente significativa.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.