set.seed(356)
data("diamonds")
muestra <- diamonds %>% sample_n(120)
head(muestra)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.74 Ideal D SI1 61.8 56 3517 5.84 5.88 3.62
## 2 0.36 Very Good D SI1 63.1 55 851 4.59 4.54 2.88
## 3 0.59 Very Good E VS2 60.6 60 2094 5.44 5.49 3.31
## 4 0.77 Premium I VS1 62.6 55 2524 5.9 5.85 3.68
## 5 0.23 Very Good F VVS2 61.2 58 530 3.96 3.98 2.43
## 6 0.51 Very Good G VS2 62.9 57 1438 5.06 5.12 3.2
mean(muestra$carat)
## [1] 0.8471667
median(muestra$carat)
## [1] 0.77
mode_carat <- as.numeric(names(sort(table(muestra$carat), decreasing = TRUE)[1]))
mode_carat
## [1] 0.71
var(muestra$depth)
## [1] 1.959639
sd(muestra$depth)
## [1] 1.399871
range(muestra$depth)
## [1] 57.8 65.1
IQR(muestra$depth)
## [1] 1.75
quantile(muestra$table, probs = c(0.25, 0.75)) # Q1 y Q3
## 25% 75%
## 56 59
quantile(muestra$table, probs = c(0.30, 0.78, 0.90)) # percentiles 30, 78, 90
## 30% 78% 90%
## 57 59 60
library(psych)
describe(muestra$x)[c("skew", "kurtosis")]
## skew kurtosis
## X1 0.14 -0.77
tabla_y <- table(muestra$y)
tabla_y
##
## 3.78 3.98 4.17 4.2 4.28 4.32 4.33 4.35 4.4 4.42 4.43 4.46 4.47 4.49 4.54 4.55
## 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1
## 4.57 4.58 4.67 4.7 4.72 4.73 4.77 4.78 4.81 4.93 5.05 5.06 5.1 5.12 5.15 5.18
## 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
## 5.19 5.25 5.27 5.29 5.31 5.4 5.46 5.49 5.57 5.67 5.68 5.7 5.74 5.75 5.77 5.8
## 1 1 2 2 1 1 1 1 1 1 2 1 2 2 1 1
## 5.84 5.85 5.88 5.9 5.93 5.97 6 6.07 6.15 6.19 6.23 6.26 6.27 6.3 6.32 6.33
## 2 2 1 1 1 1 1 1 1 1 3 1 1 1 1 1
## 6.36 6.37 6.38 6.41 6.44 6.45 6.46 6.47 6.52 6.54 6.55 6.57 6.58 6.59 6.61 6.63
## 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1
## 6.69 6.78 6.8 6.83 6.84 6.9 6.91 7.27 7.28 7.33 7.35 7.4 7.45 7.58 7.6 7.61
## 1 1 1 1 1 1 1 3 2 1 1 1 2 2 1 1
## 8.01 8.2 8.26 8.48
## 1 1 1 1
hist(muestra$y, main = "Histograma de Y", xlab = "y", col = "skyblue", border = "white")

tabla_corte <- table(muestra$cut)
tabla_corte
##
## Fair Good Very Good Premium Ideal
## 2 12 24 30 52
barplot(tabla_corte, col = "lightgreen", main = "Distribución de Corte")

prop <- round(tabla_corte/sum(tabla_corte)*100,1)
pie(tabla_corte, labels = paste0(names(tabla_corte)," (",prop,"%)"),
col = rainbow(length(tabla_corte)), main = "Distribución de Corte (Dona)")
