Estadísticos de un conjunto de tallas de camiseta

load("df.rds")
head(df,6)
##   Número de prendas    Prenda Talla Precio en USD Peso en kg
## 1                 1  Camiseta     M        142.14      2.029
## 2                 3 Camisilla     S         90.98      1.363
## 3                 1 Camisilla     S         43.57      1.432
## 4                 2  Camiseta     S        150.57      1.594
## 5                 1 Camisilla     M         42.53      1.790
## 6                 3 Camisilla     L         82.90      2.182
##   Porcentaje de descuento Medio de pago Hora de pago
## 1                       2       Tarjeta        15:15
## 2                      23       Tarjeta        22:35
## 3                       0       Tarjeta        14:06
## 4                       7       Tarjeta        19:47
## 5                       0      Efectivo        11:59
## 6                      16       Tarjeta        11:01
tabla <- data.frame(
  Variable = c("Numero de prendas", "Prenda", "Talla", "Precio en USD", "Peso en kg", "Porcentaje de descuento", "Medio de pago", "Hora de Pago"),
  Escala_de_medicion = c("Ordinal", "Nominal", "Ordinal", "Racional", "Racional", "Racional", "Nominal", "Racional"),
  Cualitativa_Cuantitativa = c("Cuantitativa", "Cualitativa", "Cualitativa", "Cuantitativa", "Cuantitativa", "Cuantitativa", "Cualitativa", "Cuantitativa"),
  Discreta_Continua_No_aplica = c("Discreta", "Discreta", "Discreta", "Continua", "Continua", "Discreta", "Discreta", "Continua"))
tabla
##                  Variable Escala_de_medicion Cualitativa_Cuantitativa
## 1       Numero de prendas            Ordinal             Cuantitativa
## 2                  Prenda            Nominal              Cualitativa
## 3                   Talla            Ordinal              Cualitativa
## 4           Precio en USD           Racional             Cuantitativa
## 5              Peso en kg           Racional             Cuantitativa
## 6 Porcentaje de descuento           Racional             Cuantitativa
## 7           Medio de pago            Nominal              Cualitativa
## 8            Hora de Pago           Racional             Cuantitativa
##   Discreta_Continua_No_aplica
## 1                    Discreta
## 2                    Discreta
## 3                    Discreta
## 4                    Continua
## 5                    Continua
## 6                    Discreta
## 7                    Discreta
## 8                    Continua

Análisis univariado de la variable Talla

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Análisis univariado de la variable Talla
#frec abosoluta
tab<-table(df$Talla)
tab
## 
##    L    M    S   XL   XS  XXL  XXS 
##  636 1343 1545  184 1003   17  272
class(tab)
## [1] "table"
frecs <- plyr::count(df$Talla)
frecs
##     x freq
## 1   L  636
## 2   M 1343
## 3   S 1545
## 4  XL  184
## 5  XS 1003
## 6 XXL   17
## 7 XXS  272
#Frecuencia relativa
frecs <- cbind(frecs,frecs$freq/sum(frecs$freq))
colnames(frecs)<-c("values","absfreq","relfreq")
frecs
##   values absfreq relfreq
## 1      L     636  0.1272
## 2      M    1343  0.2686
## 3      S    1545  0.3090
## 4     XL     184  0.0368
## 5     XS    1003  0.2006
## 6    XXL      17  0.0034
## 7    XXS     272  0.0544
#frec abosoluta
df%>%group_by(Talla)%>%summarise(h=n())%>%mutate(f=h/sum(h))->fco
fco
## # A tibble: 7 × 3
##   Talla     h      f
##   <chr> <int>  <dbl>
## 1 L       636 0.127 
## 2 M      1343 0.269 
## 3 S      1545 0.309 
## 4 XL      184 0.0368
## 5 XS     1003 0.201 
## 6 XXL      17 0.0034
## 7 XXS     272 0.0544
#Frecuencia relativa
fco%>%mutate(H=cumsum(fco$h))%>%mutate(F=cumsum(fco$f))->fco
fco
## # A tibble: 7 × 5
##   Talla     h      f     H     F
##   <chr> <int>  <dbl> <int> <dbl>
## 1 L       636 0.127    636 0.127
## 2 M      1343 0.269   1979 0.396
## 3 S      1545 0.309   3524 0.705
## 4 XL      184 0.0368  3708 0.742
## 5 XS     1003 0.201   4711 0.942
## 6 XXL      17 0.0034  4728 0.946
## 7 XXS     272 0.0544  5000 1
#orden
df%>%group_by(Talla)%>%summarise(h=n())%>%mutate(f=h/sum(h))->fco
fco
## # A tibble: 7 × 3
##   Talla     h      f
##   <chr> <int>  <dbl>
## 1 L       636 0.127 
## 2 M      1343 0.269 
## 3 S      1545 0.309 
## 4 XL      184 0.0368
## 5 XS     1003 0.201 
## 6 XXL      17 0.0034
## 7 XXS     272 0.0544
fco[match(c("XXS","XS","S","M","L", "XL", "XXL"),fco$Talla),]->fco
fco
## # A tibble: 7 × 3
##   Talla     h      f
##   <chr> <int>  <dbl>
## 1 XXS     272 0.0544
## 2 XS     1003 0.201 
## 3 S      1545 0.309 
## 4 M      1343 0.269 
## 5 L       636 0.127 
## 6 XL      184 0.0368
## 7 XXL      17 0.0034
#Frecuencia Acumulada
fco %>% mutate(H=cumsum(fco$h)) %>% mutate(F=cumsum(fco$f))->fco
fco
## # A tibble: 7 × 5
##   Talla     h      f     H      F
##   <chr> <int>  <dbl> <int>  <dbl>
## 1 XXS     272 0.0544   272 0.0544
## 2 XS     1003 0.201   1275 0.255 
## 3 S      1545 0.309   2820 0.564 
## 4 M      1343 0.269   4163 0.833 
## 5 L       636 0.127   4799 0.960 
## 6 XL      184 0.0368  4983 0.997 
## 7 XXL      17 0.0034  5000 1
library(tidyr)
library(dplyr)
library(data.table)
## 
## Adjuntando el paquete: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(moments)
library(datasets)
library(readxl)
library(ggplot2)
library(stabledist)
library(scales)
fTalla<-fco
#Promedio
mean(fTalla$h)
## [1] 714.2857
#Moda
frecs$values[which.max(frecs$relfreq)]
## [1] "S"
moda <- DescTools::Mode(df$Talla)
moda
## [1] "S"
## attr(,"freq")
## [1] 1545
#Mediana
fco
## # A tibble: 7 × 5
##   Talla     h      f     H      F
##   <chr> <int>  <dbl> <int>  <dbl>
## 1 XXS     272 0.0544   272 0.0544
## 2 XS     1003 0.201   1275 0.255 
## 3 S      1545 0.309   2820 0.564 
## 4 M      1343 0.269   4163 0.833 
## 5 L       636 0.127   4799 0.960 
## 6 XL      184 0.0368  4983 0.997 
## 7 XXL      17 0.0034  5000 1
F <- 0.69 # Buscamos el percentil 69
fco$Talla[fco$F>=F][1]
## [1] "M"
#cuartil1
df %>% group_by(Talla) %>% summarise(h=n()) %>% mutate(f = h / sum(h))->fTalla
fTalla %>% mutate(H=cumsum(fTalla$h)) %>% mutate(F=cumsum(fTalla$f))-> fTalla
fTalla
## # A tibble: 7 × 5
##   Talla     h      f     H     F
##   <chr> <int>  <dbl> <int> <dbl>
## 1 L       636 0.127    636 0.127
## 2 M      1343 0.269   1979 0.396
## 3 S      1545 0.309   3524 0.705
## 4 XL      184 0.0368  3708 0.742
## 5 XS     1003 0.201   4711 0.942
## 6 XXL      17 0.0034  4728 0.946
## 7 XXS     272 0.0544  5000 1
#cuartil1
F<-0.25
fTalla$Talla[fTalla$F>=F][1]
## [1] "M"
#cuartil3
F<-0.75 
fTalla$Talla[fTalla$F>=F][1]
## [1] "XS"
#Percentil 85
F<-0.85 
fTalla$Talla[fTalla$F>=F][1]
## [1] "XS"
#Rango 
max(fTalla$h)-min(fTalla$h)
## [1] 1528
df %>% group_by(Talla) %>% summarise(h=n()) %>% mutate(f=h/sum(h))-> frecm
frecm
## # A tibble: 7 × 3
##   Talla     h      f
##   <chr> <int>  <dbl>
## 1 L       636 0.127 
## 2 M      1343 0.269 
## 3 S      1545 0.309 
## 4 XL      184 0.0368
## 5 XS     1003 0.201 
## 6 XXL      17 0.0034
## 7 XXS     272 0.0544
#Varianza
var(fTalla$h)
## [1] 356313.2
# Desv. est. muestral
sd(fTalla$h) # Desv. est. muestral
## [1] 596.9198
#El coeficiente de asimetría
fTalla %>% summarise(across(c(h:F),skewness))
## # A tibble: 1 × 4
##       h     f      H      F
##   <dbl> <dbl>  <dbl>  <dbl>
## 1 0.229 0.229 -0.783 -0.783
#Exceso de curtosis
exckurt=function(x){
  kurtosis(x)-3
}
fTalla %>% summarise(across(c(h:F),exckurt))
## # A tibble: 1 × 4
##       h     f      H      F
##   <dbl> <dbl>  <dbl>  <dbl>
## 1 -1.48 -1.48 -0.757 -0.757
library(ggplot2)
library(dplyr)

df <- data.frame(
  Talla = c("XXS", "XS", "S", "M", "L", "XL", "XXL"),
  Frecuencia = c(272, 1003, 1545, 1343, 636, 184, 17))

df <- df %>% mutate(Porcentaje = (Frecuencia / sum(Frecuencia)) * 100)

df$Talla <- factor(df$Talla, levels = c("XXS", "XS", "S", "M", "L", "XL", "XXL"))

Including Plots

You can also embed plots, for example:

  1. Análisis univariado de la variable Precio
Precio <- data.frame(
  Intervalo = c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]"),
  Frecuencia_Absoluta = c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1),
  Frecuencia_Relativa = c(0.0012, 0.0018, 0.0102, 0.0386, 0.1172, 0.1270, 0.0482, 0.0152, 0.0322, 0.1058, 0.2026, 0.0990, 0.0260, 0.0150, 0.0240, 0.0638, 0.0508, 0.0166, 0.0034, 0.0012, 0.0002),
  Frec_Abs_Ac = c(6, 15, 66, 259, 845, 1480, 1721, 1797, 1958, 2487, 3500, 3995, 4125, 4200, 4320, 4639, 4893, 4976, 4993, 4999, 5000),
  Frec_Rel_Ac = c(0.0012, 0.0030, 0.0132, 0.0518, 0.1690, 0.2960, 0.3442, 0.3594, 0.3916, 0.4974, 0.7000, 0.7990, 0.8250, 0.8400, 0.8640, 0.9278, 0.9786, 0.9952, 0.9986, 0.9998, 1.0000))
print(Precio)
##    Intervalo Frecuencia_Absoluta Frecuencia_Relativa Frec_Abs_Ac Frec_Rel_Ac
## 1     (4,17]                   6              0.0012           6      0.0012
## 2    (17,30]                   9              0.0018          15      0.0030
## 3    (30,43]                  51              0.0102          66      0.0132
## 4    (43,56]                 193              0.0386         259      0.0518
## 5    (56,69]                 586              0.1172         845      0.1690
## 6    (69,82]                 635              0.1270        1480      0.2960
## 7    (82,95]                 241              0.0482        1721      0.3442
## 8   (95,108]                  76              0.0152        1797      0.3594
## 9  (108,121]                 161              0.0322        1958      0.3916
## 10 (121,134]                 529              0.1058        2487      0.4974
## 11 (134,147]                1013              0.2026        3500      0.7000
## 12 (147,160]                 495              0.0990        3995      0.7990
## 13 (160,173]                 130              0.0260        4125      0.8250
## 14 (173,186]                  75              0.0150        4200      0.8400
## 15 (186,199]                 120              0.0240        4320      0.8640
## 16 (199,212]                 319              0.0638        4639      0.9278
## 17 (212,225]                 254              0.0508        4893      0.9786
## 18 (225,238]                  83              0.0166        4976      0.9952
## 19 (238,251]                  17              0.0034        4993      0.9986
## 20 (251,264]                   6              0.0012        4999      0.9998
## 21 (264,277]                   1              0.0002        5000      1.0000
#Promedio
media <- mean(Precio$Frecuencia_Absoluta)
media
## [1] 238.0952
#Mediana
mediana <- median(Precio$Frecuencia_Absoluta)
mediana
## [1] 130
#Cuartil1
q1 <- quantile(Precio$Frecuencia_Absoluta, 0.25)
q1
## 25% 
##  51
#Cuartil3
q3 <- quantile(Precio$Frecuencia_Absoluta, 0.75)
q3
## 75% 
## 319
#Percentil85
p85 <- quantile(Precio$Frecuencia_Absoluta, 0.85)
p85
## 85% 
## 529
#Rango
rango <- max(Precio$Frecuencia_Absoluta) - min(Precio$Frecuencia_Absoluta)
rango
## [1] 1012
#Varianza
varianza <- var(Precio$Frecuencia_Absoluta)
varianza
## [1] 72367.09
#Desviación Estandar
desv_est <- sd(Precio$Frecuencia_Absoluta)
desv_est
## [1] 269.0113
#Coeficiente de variación
coef_var <- desv_est / media
coef_var
## [1] 1.129848
#Coeficiente de asimetria
coef_asim <- skewness(Precio$Frecuencia_Absoluta)
coef_asim
## [1] 1.391047
#Exceso de curtuosis
exceso_curtosis <- kurtosis(Precio$Frecuencia_Absoluta)
exceso_curtosis
## [1] 4.303465

REPLICAR LAS GRAFICAS

library(ggplot2)
dfbars <- data.frame(
  Intervalo = c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]"),
  Frecuencia_Absoluta = c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1)
)

# Ordenar los intervalos de menor a mayor
intervalos_ordenados <- gsub("\\(|\\]", "", dfbars$Intervalo)
intervalos_ordenados <- gsub(",", "", intervalos_ordenados)
intervalos_ordenados <- strsplit(intervalos_ordenados, " ")
intervalos_ordenados <- sapply(intervalos_ordenados, function(x) mean(as.numeric(x)))
dfbars$Intervalo <- factor(dfbars$Intervalo, levels = dfbars$Intervalo[order(intervalos_ordenados)])

# Crear el gráfico de barras
ggplot(dfbars, aes(x = Intervalo, y = Frecuencia_Absoluta)) +
  geom_bar(stat = "identity", fill = "purple", alpha = 0.7) +
  labs(title = "Frecuencia absoluta por rango de precios",
       x = "",
       y = "Frecuencia Absoluta") +
  theme_classic() +
  theme(
    panel.background = element_rect(fill = "gray"),
    panel.grid.major = element_line(size = 0.5, color = "white"),
    panel.grid.minor = element_line(size = 0.15, color = "gray"))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Grafico Histograma
library(ggplot2)


dfhis <- data.frame(
  Intervalo = c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]"),
  Frecuencia_Absoluta = c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1)
)

dfhis$Intervalo <- factor(dfhis$Intervalo, levels = dfhis$Intervalo)

ggplot(dfhis, aes(x = Intervalo, y = Frecuencia_Absoluta)) +
  geom_bar(stat = "identity", fill = alpha("purple", 0.5),  width = 1) +  
  labs(title = "Histograma de Frecuencia Absoluta", x = "Precio", y = "Frecuencia Absoluta") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.major = element_blank(),  
        panel.grid.minor = element_blank())  

intervalos <- c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]")
valores_intermedios <- c(10.5, 23.5, 36.5, 49.5, 62.5, 75.5, 88.5, 101.5, 114.5, 127.5, 140.5, 153.5, 166.5, 179.5, 192.5, 205.5, 218.5, 231.5, 244.5, 257.5, 270.5)
  frecuencias_absolutas <- c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1)

#promedio
promedio_estimado <- sum(valores_intermedios * frecuencias_absolutas) / sum(frecuencias_absolutas)
promedio_real <- 127.5 
promedio_estimado
## [1] 126.7876
promedio_real
## [1] 127.5
#varianza
varianza_estimada <- sum((valores_intermedios - promedio_estimado)^2 * frecuencias_absolutas) / (sum(frecuencias_absolutas) - 1)
varianza_real <- 441.7  
varianza_estimada
## [1] 2624.79
varianza_real
## [1] 441.7
# estándar
desviacion_estandar_estimada <- sqrt(varianza_estimada)
desviacion_estandar_real <- 21.0  
desviacion_estandar_estimada
## [1] 51.23271
desviacion_estandar_real
## [1] 21

#```{r, echo=FALSE} #library(ggplot2)

#dfr <- df %>% mutate(Intervalo_precio = cut(Precio en USD, c(4, 17, 30, 43, 56, 69, 82, 95, 108, 121, 134, 147, 160, 173, 186, 199, 212, 225, 238, 251, 264, 277), include.lowest = TRUE)) # Incluye include.lowest #head(dfr) #ggplot(dfr, aes(x = Prenda, y = Precio en USD, fill = Prenda)) + #geom_boxplot(alpha = 0.6, outlier.shape = 16, outlier.size = 2) + #theme_classic() + #labs(x = “Prenda”, y = “Precios”) + #scale_fill_manual(values = c(“#FC8D62”, “#66C2A5”, “#8DA0CB”)) +
#theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 12), # legend.position = “none”) #```