Carga de Datos
datos <- read.csv("datos_negocios_adicional_1_ventas_online.csv")
datos <- datos %>% mutate_if(is.character, as.factor)
str(datos)
## 'data.frame': 100 obs. of 4 variables:
## $ Plataforma : Factor w/ 4 levels "Amazon","eBay",..: 4 3 4 1 3 1 4 3 3 3 ...
## $ Ingresos : num 12396 75450 42504 7550 11774 ...
## $ Pedidos_Realizados: int 787 730 998 419 590 935 204 248 217 803 ...
## $ Devoluciones : int 55 4 148 12 14 9 30 42 39 88 ...
summary(datos)
## Plataforma Ingresos Pedidos_Realizados Devoluciones
## Amazon :29 Min. : 5281 Min. : 12.0 Min. : 0.00
## eBay :16 1st Qu.:22764 1st Qu.:251.0 1st Qu.: 12.00
## MercadoLibre:29 Median :48572 Median :479.5 Median : 32.00
## Shopify :26 Mean :48942 Mean :490.6 Mean : 41.63
## 3rd Qu.:69834 3rd Qu.:709.5 3rd Qu.: 64.25
## Max. :98912 Max. :998.0 Max. :148.00
Análisis Descriptivo
# Resumen variables numéricas
datos %>% select_if(is.numeric) %>% summary()
## Ingresos Pedidos_Realizados Devoluciones
## Min. : 5281 Min. : 12.0 Min. : 0.00
## 1st Qu.:22764 1st Qu.:251.0 1st Qu.: 12.00
## Median :48572 Median :479.5 Median : 32.00
## Mean :48942 Mean :490.6 Mean : 41.63
## 3rd Qu.:69834 3rd Qu.:709.5 3rd Qu.: 64.25
## Max. :98912 Max. :998.0 Max. :148.00
# Frecuencia variables categóricas
datos %>% select_if(is.factor) %>% map(table)
## $Plataforma
##
## Amazon eBay MercadoLibre Shopify
## 29 16 29 26
# Histogramas
datos %>%
select_if(is.numeric) %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "valor") %>%
ggplot(aes(x = valor)) +
facet_wrap(~variable, scales = "free") +
geom_histogram(bins = 30, fill = "steelblue", color = "white")

# Gráfico de barras categóricas
datos %>%
select_if(is.factor) %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "valor") %>%
ggplot(aes(x = valor)) +
facet_wrap(~variable, scales = "free") +
geom_bar(fill = "coral") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Boxplots
datos %>%
select_if(is.numeric) %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "valor") %>%
ggplot(aes(x = variable, y = valor)) +
geom_boxplot(fill = "lightgreen") +
coord_flip()

Estimaciones Estadísticas
var_numerica <- datos$Ingresos
# Media e IC
media <- mean(var_numerica, na.rm = TRUE)
error <- sd(var_numerica, na.rm = TRUE)/sqrt(length(na.omit(var_numerica)))
ic_media <- c(media - 1.96*error, media + 1.96*error)
ic_media
## [1] 43571.87 54312.25
# Proporción Plataforma
prop_table <- prop.table(table(datos$Plataforma))
n <- sum(!is.na(datos$Plataforma))
p <- prop_table[1]
error_p <- sqrt(p*(1-p)/n)
ic_prop <- c(p - 1.96*error_p, p + 1.96*error_p)
ic_prop
## Amazon Amazon
## 0.2010626 0.3789374
# Varianza
n_var <- length(na.omit(var_numerica))
s2 <- var(var_numerica, na.rm = TRUE)
ic_var <- c(
(n_var - 1)*s2/qchisq(0.975, df = n_var - 1),
(n_var - 1)*s2/qchisq(0.025, df = n_var - 1)
)
ic_var
## [1] 578712669 1013063488
Pruebas de Hipótesis
# Hipótesis media
t.test(var_numerica, mu = 10000)
##
## One Sample t-test
##
## data: var_numerica
## t = 14.213, df = 99, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 10000
## 95 percent confidence interval:
## 43505.52 54378.60
## sample estimates:
## mean of x
## 48942.06
# Hipótesis proporción
prop.test(x = round(p*n), n = n, p = 0.5)
##
## 1-sample proportions test with continuity correction
##
## data: round(p * n) out of n, null probability 0.5
## X-squared = 16.81, df = 1, p-value = 4.132e-05
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.2057664 0.3906248
## sample estimates:
## p
## 0.29
# Hipótesis varianza
chi_valor <- (n_var - 1)*s2/1000000
p_valor <- 2*min(pchisq(chi_valor, df = n_var - 1), 1 - pchisq(chi_valor, df = n_var - 1))
p_valor
## [1] 0
# Repetición pruebas clave
t.test(Ingresos ~ Plataforma, data = datos_dos)
##
## Welch Two Sample t-test
##
## data: Ingresos by Plataforma
## t = -1.9096, df = 55.889, p-value = 0.06132
## alternative hypothesis: true difference in means between group Amazon and group MercadoLibre is not equal to 0
## 95 percent confidence interval:
## -28715.3929 687.7632
## sample estimates:
## mean in group Amazon mean in group MercadoLibre
## 39110.96 53124.78
prop.test(tabla_prop)
## Warning in prop.test(tabla_prop): Chi-squared approximation may be incorrect
##
## 2-sample test for equality of proportions with continuity correction
##
## data: tabla_prop
## X-squared = 0, df = 1, p-value = 1
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.1136470 0.1826125
## sample estimates:
## prop 1 prop 2
## 0.06896552 0.03448276
var.test(Ingresos ~ Plataforma, data = datos_dos)
##
## F test to compare two variances
##
## data: Ingresos by Plataforma
## F = 0.91475, num df = 28, denom df = 28, p-value = 0.8153
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4294754 1.9483484
## sample estimates:
## ratio of variances
## 0.9147501