# Librerías
library(ggplot2)
library(dplyr)
library(psych)
set.seed(563)
data("diamonds")
muestra <- diamonds %>% sample_n(120)
head(muestra)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.93 Ideal E SI1 61.9 55 4511 6.26 6.31 3.89
## 2 1 Ideal G VS2 62.3 56 5887 6.37 6.41 3.98
## 3 0.25 Very Good E VVS2 61.1 61 533 4.03 4.06 2.47
## 4 1.01 Very Good D SI1 59 59 5600 6.57 6.59 3.88
## 5 0.7 Ideal I SI1 61 57 2287 5.71 5.74 3.5
## 6 0.56 Ideal J VS2 62 56 1224 5.27 5.31 3.28
# Medidas de tendencia central (variable quilate)
mean(muestra$carat)
## [1] 0.83125
median(muestra$carat)
## [1] 0.71
mode_carat <- as.numeric(names(sort(table(muestra$carat), decreasing = TRUE)[1]))
mode_carat
## [1] 1.01
# Medidas de variabilidad (variable profundidad)
var(muestra$depth)
## [1] 1.27972
sd(muestra$depth)
## [1] 1.131247
range(muestra$depth)
## [1] 58.2 65.8
IQR(muestra$depth)
## [1] 1.3
# Cuartiles y percentiles (variable tabla)
quantile(muestra$table, probs = c(0.25, 0.75))
## 25% 75%
## 56.00 58.25
quantile(muestra$table, probs = c(0.30, 0.78, 0.90))
## 30% 78% 90%
## 56 59 60
# Medidas de forma (variable x)
describe(muestra$x)[c("skew", "kurtosis")]
## skew kurtosis
## X1 0.42 -0.63
# Tabla de frecuencias y histograma (variable y)
tabla_y <- table(muestra$y)
tabla_y
##
## 3.97 4.02 4.04 4.06 4.24 4.25 4.26 4.29 4.31 4.32 4.34 4.35 4.36 4.4 4.42 4.44
## 2 1 1 1 1 1 1 2 2 1 1 1 1 1 1 2
## 4.47 4.49 4.6 4.63 4.7 4.72 4.73 4.74 4.76 4.79 4.82 4.83 4.84 4.87 4.89 4.94
## 1 1 1 1 1 2 3 3 1 1 1 1 1 1 1 1
## 5.09 5.11 5.15 5.18 5.25 5.31 5.34 5.38 5.43 5.49 5.53 5.71 5.72 5.74 5.75 5.76
## 1 1 1 2 1 1 1 1 1 1 1 2 2 1 1 1
## 5.78 5.79 5.8 5.83 5.85 5.99 6 6.05 6.06 6.11 6.12 6.21 6.25 6.29 6.31 6.32
## 1 2 1 1 1 1 1 1 1 1 1 1 2 2 1 1
## 6.34 6.36 6.38 6.41 6.44 6.46 6.47 6.5 6.59 6.61 6.66 6.68 6.71 6.83 6.87 6.88
## 1 1 1 1 1 4 2 2 1 1 1 2 1 1 1 1
## 6.96 7.18 7.2 7.24 7.28 7.31 7.36 7.41 7.45 7.48 7.63 8 8.1 8.12 8.16 8.31
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 8.37 8.43 8.67
## 1 1 1
hist(muestra$y,
main = "Histograma de Y",
xlab = "Valores de Y",
col = "orange",
border = "darkred")

# Tabla de frecuencias y gráficos (variable corte)
tabla_corte <- table(muestra$cut)
tabla_corte
##
## Fair Good Very Good Premium Ideal
## 2 9 24 31 54
barplot(tabla_corte,
col = c("steelblue", "tomato", "seagreen", "gold", "orchid"),
main = "Distribución de Corte",
border = "black")

prop <- round(tabla_corte/sum(tabla_corte)*100,1)
pie(tabla_corte,
labels = paste0(names(tabla_corte)," (",prop,"%)"),
col = c("red", "blue", "green", "purple", "yellow"),
main = "Distribución de Corte (Dona)")
