Estadísticos de un conjunto de tallas de camiseta
load("df.rds")
head(df,6)
## Número de prendas Prenda Talla Precio en USD Peso en kg
## 1 1 Camiseta M 142.14 2.029
## 2 3 Camisilla S 90.98 1.363
## 3 1 Camisilla S 43.57 1.432
## 4 2 Camiseta S 150.57 1.594
## 5 1 Camisilla M 42.53 1.790
## 6 3 Camisilla L 82.90 2.182
## Porcentaje de descuento Medio de pago Hora de pago
## 1 2 Tarjeta 15:15
## 2 23 Tarjeta 22:35
## 3 0 Tarjeta 14:06
## 4 7 Tarjeta 19:47
## 5 0 Efectivo 11:59
## 6 16 Tarjeta 11:01
tabla <- data.frame(
Variable = c("Numero de prendas", "Prenda", "Talla", "Precio en USD", "Peso en kg", "Porcentaje de descuento", "Medio de pago", "Hora de Pago"),
Escala_de_medicion = c("Ordinal", "Nominal", "Ordinal", "Racional", "Racional", "Racional", "Nominal", "Racional"),
Cualitativa_Cuantitativa = c("Cuantitativa", "Cualitativa", "Cualitativa", "Cuantitativa", "Cuantitativa", "Cuantitativa", "Cualitativa", "Cuantitativa"),
Discreta_Continua_No_aplica = c("Discreta", "Discreta", "Discreta", "Continua", "Continua", "Discreta", "Discreta", "Continua"))
tabla
## Variable Escala_de_medicion Cualitativa_Cuantitativa
## 1 Numero de prendas Ordinal Cuantitativa
## 2 Prenda Nominal Cualitativa
## 3 Talla Ordinal Cualitativa
## 4 Precio en USD Racional Cuantitativa
## 5 Peso en kg Racional Cuantitativa
## 6 Porcentaje de descuento Racional Cuantitativa
## 7 Medio de pago Nominal Cualitativa
## 8 Hora de Pago Racional Cuantitativa
## Discreta_Continua_No_aplica
## 1 Discreta
## 2 Discreta
## 3 Discreta
## 4 Continua
## 5 Continua
## 6 Discreta
## 7 Discreta
## 8 Continua
Análisis univariado de la variable Talla
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Análisis univariado de la variable Talla
#frec abosoluta
tab<-table(df$Talla)
tab
##
## L M S XL XS XXL XXS
## 636 1343 1545 184 1003 17 272
class(tab)
## [1] "table"
frecs <- plyr::count(df$Talla)
frecs
## x freq
## 1 L 636
## 2 M 1343
## 3 S 1545
## 4 XL 184
## 5 XS 1003
## 6 XXL 17
## 7 XXS 272
#Frecuencia relativa
frecs <- cbind(frecs,frecs$freq/sum(frecs$freq))
colnames(frecs)<-c("values","absfreq","relfreq")
frecs
## values absfreq relfreq
## 1 L 636 0.1272
## 2 M 1343 0.2686
## 3 S 1545 0.3090
## 4 XL 184 0.0368
## 5 XS 1003 0.2006
## 6 XXL 17 0.0034
## 7 XXS 272 0.0544
#frec abosoluta
df%>%group_by(Talla)%>%summarise(h=n())%>%mutate(f=h/sum(h))->fco
fco
## # A tibble: 7 × 3
## Talla h f
## <chr> <int> <dbl>
## 1 L 636 0.127
## 2 M 1343 0.269
## 3 S 1545 0.309
## 4 XL 184 0.0368
## 5 XS 1003 0.201
## 6 XXL 17 0.0034
## 7 XXS 272 0.0544
#Frecuencia relativa
fco%>%mutate(H=cumsum(fco$h))%>%mutate(F=cumsum(fco$f))->fco
fco
## # A tibble: 7 × 5
## Talla h f H F
## <chr> <int> <dbl> <int> <dbl>
## 1 L 636 0.127 636 0.127
## 2 M 1343 0.269 1979 0.396
## 3 S 1545 0.309 3524 0.705
## 4 XL 184 0.0368 3708 0.742
## 5 XS 1003 0.201 4711 0.942
## 6 XXL 17 0.0034 4728 0.946
## 7 XXS 272 0.0544 5000 1
#orden
df%>%group_by(Talla)%>%summarise(h=n())%>%mutate(f=h/sum(h))->fco
fco
## # A tibble: 7 × 3
## Talla h f
## <chr> <int> <dbl>
## 1 L 636 0.127
## 2 M 1343 0.269
## 3 S 1545 0.309
## 4 XL 184 0.0368
## 5 XS 1003 0.201
## 6 XXL 17 0.0034
## 7 XXS 272 0.0544
fco[match(c("XXS","XS","S","M","L", "XL", "XXL"),fco$Talla),]->fco
fco
## # A tibble: 7 × 3
## Talla h f
## <chr> <int> <dbl>
## 1 XXS 272 0.0544
## 2 XS 1003 0.201
## 3 S 1545 0.309
## 4 M 1343 0.269
## 5 L 636 0.127
## 6 XL 184 0.0368
## 7 XXL 17 0.0034
#Frecuencia Acumulada
fco %>% mutate(H=cumsum(fco$h)) %>% mutate(F=cumsum(fco$f))->fco
fco
## # A tibble: 7 × 5
## Talla h f H F
## <chr> <int> <dbl> <int> <dbl>
## 1 XXS 272 0.0544 272 0.0544
## 2 XS 1003 0.201 1275 0.255
## 3 S 1545 0.309 2820 0.564
## 4 M 1343 0.269 4163 0.833
## 5 L 636 0.127 4799 0.960
## 6 XL 184 0.0368 4983 0.997
## 7 XXL 17 0.0034 5000 1
library(tidyr)
library(dplyr)
library(data.table)
##
## Adjuntando el paquete: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(moments)
library(datasets)
library(readxl)
library(ggplot2)
library(stabledist)
library(scales)
fTalla<-fco
#Promedio
mean(fTalla$h)
## [1] 714.2857
#Moda
frecs$values[which.max(frecs$relfreq)]
## [1] "S"
moda <- DescTools::Mode(df$Talla)
moda
## [1] "S"
## attr(,"freq")
## [1] 1545
#Mediana
fco
## # A tibble: 7 × 5
## Talla h f H F
## <chr> <int> <dbl> <int> <dbl>
## 1 XXS 272 0.0544 272 0.0544
## 2 XS 1003 0.201 1275 0.255
## 3 S 1545 0.309 2820 0.564
## 4 M 1343 0.269 4163 0.833
## 5 L 636 0.127 4799 0.960
## 6 XL 184 0.0368 4983 0.997
## 7 XXL 17 0.0034 5000 1
F <- 0.69 # Buscamos el percentil 69
fco$Talla[fco$F>=F][1]
## [1] "M"
#cuartil1
df %>% group_by(Talla) %>% summarise(h=n()) %>% mutate(f = h / sum(h))->fTalla
fTalla %>% mutate(H=cumsum(fTalla$h)) %>% mutate(F=cumsum(fTalla$f))-> fTalla
fTalla
## # A tibble: 7 × 5
## Talla h f H F
## <chr> <int> <dbl> <int> <dbl>
## 1 L 636 0.127 636 0.127
## 2 M 1343 0.269 1979 0.396
## 3 S 1545 0.309 3524 0.705
## 4 XL 184 0.0368 3708 0.742
## 5 XS 1003 0.201 4711 0.942
## 6 XXL 17 0.0034 4728 0.946
## 7 XXS 272 0.0544 5000 1
#cuartil1
F<-0.25
fTalla$Talla[fTalla$F>=F][1]
## [1] "M"
#cuartil3
F<-0.75
fTalla$Talla[fTalla$F>=F][1]
## [1] "XS"
#Percentil 85
F<-0.85
fTalla$Talla[fTalla$F>=F][1]
## [1] "XS"
#Rango
max(fTalla$h)-min(fTalla$h)
## [1] 1528
df %>% group_by(Talla) %>% summarise(h=n()) %>% mutate(f=h/sum(h))-> frecm
frecm
## # A tibble: 7 × 3
## Talla h f
## <chr> <int> <dbl>
## 1 L 636 0.127
## 2 M 1343 0.269
## 3 S 1545 0.309
## 4 XL 184 0.0368
## 5 XS 1003 0.201
## 6 XXL 17 0.0034
## 7 XXS 272 0.0544
#Varianza
var(fTalla$h)
## [1] 356313.2
# Desv. est. muestral
sd(fTalla$h) # Desv. est. muestral
## [1] 596.9198
#El coeficiente de asimetría
fTalla %>% summarise(across(c(h:F),skewness))
## # A tibble: 1 × 4
## h f H F
## <dbl> <dbl> <dbl> <dbl>
## 1 0.229 0.229 -0.783 -0.783
#Exceso de curtosis
exckurt=function(x){
kurtosis(x)-3
}
fTalla %>% summarise(across(c(h:F),exckurt))
## # A tibble: 1 × 4
## h f H F
## <dbl> <dbl> <dbl> <dbl>
## 1 -1.48 -1.48 -0.757 -0.757
library(ggplot2)
library(dplyr)
df <- data.frame(
Talla = c("XXS", "XS", "S", "M", "L", "XL", "XXL"),
Frecuencia = c(272, 1003, 1545, 1343, 636, 184, 17))
df <- df %>% mutate(Porcentaje = (Frecuencia / sum(Frecuencia)) * 100)
df$Talla <- factor(df$Talla, levels = c("XXS", "XS", "S", "M", "L", "XL", "XXL"))
You can also embed plots, for example:
Precio <- data.frame(
Intervalo = c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]"),
Frecuencia_Absoluta = c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1),
Frecuencia_Relativa = c(0.0012, 0.0018, 0.0102, 0.0386, 0.1172, 0.1270, 0.0482, 0.0152, 0.0322, 0.1058, 0.2026, 0.0990, 0.0260, 0.0150, 0.0240, 0.0638, 0.0508, 0.0166, 0.0034, 0.0012, 0.0002),
Frec_Abs_Ac = c(6, 15, 66, 259, 845, 1480, 1721, 1797, 1958, 2487, 3500, 3995, 4125, 4200, 4320, 4639, 4893, 4976, 4993, 4999, 5000),
Frec_Rel_Ac = c(0.0012, 0.0030, 0.0132, 0.0518, 0.1690, 0.2960, 0.3442, 0.3594, 0.3916, 0.4974, 0.7000, 0.7990, 0.8250, 0.8400, 0.8640, 0.9278, 0.9786, 0.9952, 0.9986, 0.9998, 1.0000))
print(Precio)
## Intervalo Frecuencia_Absoluta Frecuencia_Relativa Frec_Abs_Ac Frec_Rel_Ac
## 1 (4,17] 6 0.0012 6 0.0012
## 2 (17,30] 9 0.0018 15 0.0030
## 3 (30,43] 51 0.0102 66 0.0132
## 4 (43,56] 193 0.0386 259 0.0518
## 5 (56,69] 586 0.1172 845 0.1690
## 6 (69,82] 635 0.1270 1480 0.2960
## 7 (82,95] 241 0.0482 1721 0.3442
## 8 (95,108] 76 0.0152 1797 0.3594
## 9 (108,121] 161 0.0322 1958 0.3916
## 10 (121,134] 529 0.1058 2487 0.4974
## 11 (134,147] 1013 0.2026 3500 0.7000
## 12 (147,160] 495 0.0990 3995 0.7990
## 13 (160,173] 130 0.0260 4125 0.8250
## 14 (173,186] 75 0.0150 4200 0.8400
## 15 (186,199] 120 0.0240 4320 0.8640
## 16 (199,212] 319 0.0638 4639 0.9278
## 17 (212,225] 254 0.0508 4893 0.9786
## 18 (225,238] 83 0.0166 4976 0.9952
## 19 (238,251] 17 0.0034 4993 0.9986
## 20 (251,264] 6 0.0012 4999 0.9998
## 21 (264,277] 1 0.0002 5000 1.0000
#Promedio
media <- mean(Precio$Frecuencia_Absoluta)
media
## [1] 238.0952
#Mediana
mediana <- median(Precio$Frecuencia_Absoluta)
mediana
## [1] 130
#Cuartil1
q1 <- quantile(Precio$Frecuencia_Absoluta, 0.25)
q1
## 25%
## 51
#Cuartil3
q3 <- quantile(Precio$Frecuencia_Absoluta, 0.75)
q3
## 75%
## 319
#Percentil85
p85 <- quantile(Precio$Frecuencia_Absoluta, 0.85)
p85
## 85%
## 529
#Rango
rango <- max(Precio$Frecuencia_Absoluta) - min(Precio$Frecuencia_Absoluta)
rango
## [1] 1012
#Varianza
varianza <- var(Precio$Frecuencia_Absoluta)
varianza
## [1] 72367.09
#Desviación Estandar
desv_est <- sd(Precio$Frecuencia_Absoluta)
desv_est
## [1] 269.0113
#Coeficiente de variación
coef_var <- desv_est / media
coef_var
## [1] 1.129848
#Coeficiente de asimetria
coef_asim <- skewness(Precio$Frecuencia_Absoluta)
coef_asim
## [1] 1.391047
#Exceso de curtuosis
exceso_curtosis <- kurtosis(Precio$Frecuencia_Absoluta)
exceso_curtosis
## [1] 4.303465
REPLICAR LAS GRAFICAS
library(ggplot2)
dfbars <- data.frame(
Intervalo = c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]"),
Frecuencia_Absoluta = c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1)
)
# Ordenar los intervalos de menor a mayor
intervalos_ordenados <- gsub("\\(|\\]", "", dfbars$Intervalo)
intervalos_ordenados <- gsub(",", "", intervalos_ordenados)
intervalos_ordenados <- strsplit(intervalos_ordenados, " ")
intervalos_ordenados <- sapply(intervalos_ordenados, function(x) mean(as.numeric(x)))
dfbars$Intervalo <- factor(dfbars$Intervalo, levels = dfbars$Intervalo[order(intervalos_ordenados)])
# Crear el gráfico de barras
ggplot(dfbars, aes(x = Intervalo, y = Frecuencia_Absoluta)) +
geom_bar(stat = "identity", fill = "purple", alpha = 0.7) +
labs(title = "Frecuencia absoluta por rango de precios",
x = "",
y = "Frecuencia Absoluta") +
theme_classic() +
theme(
panel.background = element_rect(fill = "gray"),
panel.grid.major = element_line(size = 0.5, color = "white"),
panel.grid.minor = element_line(size = 0.15, color = "gray"))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Grafico Histograma
library(ggplot2)
dfhis <- data.frame(
Intervalo = c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]"),
Frecuencia_Absoluta = c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1)
)
dfhis$Intervalo <- factor(dfhis$Intervalo, levels = dfhis$Intervalo)
ggplot(dfhis, aes(x = Intervalo, y = Frecuencia_Absoluta)) +
geom_bar(stat = "identity", fill = alpha("purple", 0.5), width = 1) +
labs(title = "Histograma de Frecuencia Absoluta", x = "Precio", y = "Frecuencia Absoluta") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
intervalos <- c("(4,17]", "(17,30]", "(30,43]", "(43,56]", "(56,69]", "(69,82]", "(82,95]", "(95,108]", "(108,121]", "(121,134]", "(134,147]", "(147,160]", "(160,173]", "(173,186]", "(186,199]", "(199,212]", "(212,225]", "(225,238]", "(238,251]", "(251,264]", "(264,277]")
valores_intermedios <- c(10.5, 23.5, 36.5, 49.5, 62.5, 75.5, 88.5, 101.5, 114.5, 127.5, 140.5, 153.5, 166.5, 179.5, 192.5, 205.5, 218.5, 231.5, 244.5, 257.5, 270.5)
frecuencias_absolutas <- c(6, 9, 51, 193, 586, 635, 241, 76, 161, 529, 1013, 495, 130, 75, 120, 319, 254, 83, 17, 6, 1)
#promedio
promedio_estimado <- sum(valores_intermedios * frecuencias_absolutas) / sum(frecuencias_absolutas)
promedio_real <- 127.5
promedio_estimado
## [1] 126.7876
promedio_real
## [1] 127.5
#varianza
varianza_estimada <- sum((valores_intermedios - promedio_estimado)^2 * frecuencias_absolutas) / (sum(frecuencias_absolutas) - 1)
varianza_real <- 441.7
varianza_estimada
## [1] 2624.79
varianza_real
## [1] 441.7
# estándar
desviacion_estandar_estimada <- sqrt(varianza_estimada)
desviacion_estandar_real <- 21.0
desviacion_estandar_estimada
## [1] 51.23271
desviacion_estandar_real
## [1] 21
#```{r, echo=FALSE} #library(ggplot2)
#dfr <- df %>% mutate(Intervalo_precio =
cut(Precio en USD, c(4, 17, 30, 43, 56, 69, 82, 95, 108,
121, 134, 147, 160, 173, 186, 199, 212, 225, 238, 251, 264, 277),
include.lowest = TRUE)) # Incluye include.lowest #head(dfr) #ggplot(dfr,
aes(x = Prenda, y = Precio en USD, fill = Prenda)) +
#geom_boxplot(alpha = 0.6, outlier.shape = 16, outlier.size = 2) +
#theme_classic() + #labs(x = “Prenda”, y = “Precios”) +
#scale_fill_manual(values = c(“#FC8D62”, “#66C2A5”, “#8DA0CB”)) +
#theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 12), #
legend.position = “none”) #```