# Librerías
library(ggplot2)
library(dplyr)
library(psych)

set.seed(563)
data("diamonds")

muestra <- diamonds %>% sample_n(120)
head(muestra)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.93 Ideal     E     SI1      61.9    55  4511  6.26  6.31  3.89
## 2  1    Ideal     G     VS2      62.3    56  5887  6.37  6.41  3.98
## 3  0.25 Very Good E     VVS2     61.1    61   533  4.03  4.06  2.47
## 4  1.01 Very Good D     SI1      59      59  5600  6.57  6.59  3.88
## 5  0.7  Ideal     I     SI1      61      57  2287  5.71  5.74  3.5 
## 6  0.56 Ideal     J     VS2      62      56  1224  5.27  5.31  3.28
# Medidas de tendencia central (variable quilate)
mean(muestra$carat)
## [1] 0.83125
median(muestra$carat)
## [1] 0.71
mode_carat <- as.numeric(names(sort(table(muestra$carat), decreasing = TRUE)[1]))
mode_carat
## [1] 1.01
# Medidas de variabilidad (variable profundidad)
var(muestra$depth)
## [1] 1.27972
sd(muestra$depth)
## [1] 1.131247
range(muestra$depth)
## [1] 58.2 65.8
IQR(muestra$depth)
## [1] 1.3
# Cuartiles y percentiles (variable tabla)
quantile(muestra$table, probs = c(0.25, 0.75)) 
##   25%   75% 
## 56.00 58.25
quantile(muestra$table, probs = c(0.30, 0.78, 0.90)) 
## 30% 78% 90% 
##  56  59  60
# Medidas de forma (variable x)
describe(muestra$x)[c("skew", "kurtosis")]
##    skew kurtosis
## X1 0.42    -0.63
# Tabla de frecuencias y histograma (variable y)
tabla_y <- table(muestra$y)
tabla_y
## 
## 3.97 4.02 4.04 4.06 4.24 4.25 4.26 4.29 4.31 4.32 4.34 4.35 4.36  4.4 4.42 4.44 
##    2    1    1    1    1    1    1    2    2    1    1    1    1    1    1    2 
## 4.47 4.49  4.6 4.63  4.7 4.72 4.73 4.74 4.76 4.79 4.82 4.83 4.84 4.87 4.89 4.94 
##    1    1    1    1    1    2    3    3    1    1    1    1    1    1    1    1 
## 5.09 5.11 5.15 5.18 5.25 5.31 5.34 5.38 5.43 5.49 5.53 5.71 5.72 5.74 5.75 5.76 
##    1    1    1    2    1    1    1    1    1    1    1    2    2    1    1    1 
## 5.78 5.79  5.8 5.83 5.85 5.99    6 6.05 6.06 6.11 6.12 6.21 6.25 6.29 6.31 6.32 
##    1    2    1    1    1    1    1    1    1    1    1    1    2    2    1    1 
## 6.34 6.36 6.38 6.41 6.44 6.46 6.47  6.5 6.59 6.61 6.66 6.68 6.71 6.83 6.87 6.88 
##    1    1    1    1    1    4    2    2    1    1    1    2    1    1    1    1 
## 6.96 7.18  7.2 7.24 7.28 7.31 7.36 7.41 7.45 7.48 7.63    8  8.1 8.12 8.16 8.31 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 8.37 8.43 8.67 
##    1    1    1
hist(muestra$y, 
     main = "Histograma de Y", 
     xlab = "Valores de Y", 
     col = "orange", 
     border = "darkred")

# Tabla de frecuencias y gráficos (variable corte)
tabla_corte <- table(muestra$cut)
tabla_corte
## 
##      Fair      Good Very Good   Premium     Ideal 
##         2         9        24        31        54
barplot(tabla_corte, 
        col = c("steelblue", "tomato", "seagreen", "gold", "orchid"), 
        main = "Distribución de Corte", 
        border = "black")

prop <- round(tabla_corte/sum(tabla_corte)*100,1)

pie(tabla_corte, 
    labels = paste0(names(tabla_corte)," (",prop,"%)"),
    col = c("red", "blue", "green", "purple", "yellow"),
    main = "Distribución de Corte (Dona)")