Datos agrupados

Proceso:

Identificar datos de la muestra

muestra <- c(18, 19, 20, 24, 26, 28, 18, 19, 23, 16, 21, 30, 23, 27, 29, 15, 18, 26, 34, 45, 23, 46, 53, 23, 38, 46, 34, 13, 46, 87, 46, 62, 56, 27, 22, 29, 65, 54, 76, 86, 74, 34, 45, 54, 65, 76, 23, 45)
muestra
##  [1] 18 19 20 24 26 28 18 19 23 16 21 30 23 27 29 15 18 26 34 45 23 46 53
## [24] 23 38 46 34 13 46 87 46 62 56 27 22 29 65 54 76 86 74 34 45 54 65 76
## [47] 23 45

* Ordenar datos y mostrar

muestraord <- sort(muestra)
muestraord
##  [1] 13 15 16 18 18 18 19 19 20 21 22 23 23 23 23 23 24 26 26 27 27 28 29
## [24] 29 30 34 34 34 38 45 45 45 46 46 46 46 53 54 54 56 62 65 65 74 76 76
## [47] 86 87

Encontrar número de elementos n, valores máximos y máximos , rango y amplitud del rango de la muestra

n <- length(muestra)
n
## [1] 48
max(muestra)
## [1] 87
min(muestra)
## [1] 13
rango <- range(muestra) # Valores mínimo y máximo
rango
## [1] 13 87
amplitud <- diff(rango) # amplitud del rango. Tambien es max(muestra) - min(muestra)
amplitud
## [1] 74

Agrupando datos de manera habitual

# Fórmula : valor mínimo / intérvalos
nointervalos <- 5   # Número de intervalos que se desea
rangointervalos <- amplitud / nointervalos
rangointervalos
## [1] 14.8
# paste significa concatenar
print(paste("Los valores de cada grupos van ..."," de ", rangointervalos, " en  ", rangointervalos, " a partir de :", min(muestra)))
## [1] "Los valores de cada grupos van ...  de  14.8  en   14.8  a partir de : 13"

Tabla de frecuencia de datos agrupados

  • Se empieza del valor menor para evitar errores de agrupamiento
tabla.intervalos <- transform(table(cut(muestra, breaks = 5)))
tabla.intervalos
##          Var1 Freq
## 1 (12.9,27.8]   21
## 2 (27.8,42.6]    8
## 3 (42.6,57.4]   11
## 4 (57.4,72.2]    3
## 5 (72.2,87.1]    5

Plot o visualiar tabla de frecuencia

plot(tabla.intervalos, main = "¿De cuál intervalo hay más y menos elementos?")

Regla de Sturges.

1 + 3.3222* (log10(n)) ## Redondeado hacia arriba entonces sale 6 igual que siguiente
## [1] 6.58542
nointervalos <- nclass.Sturges(muestra) # igual al número de intervalos aquí sale 6
nointervalos
## [1] 7
cut(muestra, breaks = nointervalos) #Cortes de cada intérvalo
##  [1] (12.9,23.6] (12.9,23.6] (12.9,23.6] (23.6,34.1] (23.6,34.1]
##  [6] (23.6,34.1] (12.9,23.6] (12.9,23.6] (12.9,23.6] (12.9,23.6]
## [11] (12.9,23.6] (23.6,34.1] (12.9,23.6] (23.6,34.1] (23.6,34.1]
## [16] (12.9,23.6] (12.9,23.6] (23.6,34.1] (23.6,34.1] (44.7,55.3]
## [21] (12.9,23.6] (44.7,55.3] (44.7,55.3] (12.9,23.6] (34.1,44.7]
## [26] (44.7,55.3] (23.6,34.1] (12.9,23.6] (44.7,55.3] (76.4,87.1]
## [31] (44.7,55.3] (55.3,65.9] (55.3,65.9] (23.6,34.1] (12.9,23.6]
## [36] (23.6,34.1] (55.3,65.9] (44.7,55.3] (65.9,76.4] (76.4,87.1]
## [41] (65.9,76.4] (23.6,34.1] (44.7,55.3] (44.7,55.3] (55.3,65.9]
## [46] (65.9,76.4] (12.9,23.6] (44.7,55.3]
## 7 Levels: (12.9,23.6] (23.6,34.1] (34.1,44.7] (44.7,55.3] ... (76.4,87.1]
tabla.intervalos <- transform(table(cut(muestra, breaks = nointervalos))) # son 6
tabla.intervalos
##          Var1 Freq
## 1 (12.9,23.6]   16
## 2 (23.6,34.1]   12
## 3 (34.1,44.7]    1
## 4 (44.7,55.3]   10
## 5 (55.3,65.9]    4
## 6 (65.9,76.4]    3
## 7 (76.4,87.1]    2
pie(tabla.intervalos$Freq, labels = paste(tabla.intervalos$Var1, " - ", tabla.intervalos$Freq), main = "¿De cuál intervalo hay más y menos elementos?. Sturges")