Clases de los promedios de los alumnos del ITD

Objetivo: Determinar clases conforme reglas Sturges, Scott y FD (Freedman-Diaconis). Histograma y pastel

Proceso:

  • Importar datos de promedios de alumnos
  • Graficas histograma sin cortes o breaks
  • Graficar histograma de alumnos con promedio mayor que cero. Sturges, Scott, FD
  • Determinar clases conforme a Sturges
  • Graficar pastel
  • Determinar clases conforme a Scott
  • Graficar pastel
  • Determinar clases conforme a FD
  • Graficar pastel
  • Interpretar salidas de datos

Los datos

datos <- read.csv("C:/Users/SATELLITE/Documents/SEMESTRE ene-jun 2018/Z PROBABILIDAD Y ESTADISTICAS/DATOS/ALUMNOS DE ISC INSCRITOS/alumnos inscritos ene-jun 2018.csv", header = TRUE, sep = ",")

# datos # Los datos le ponemos comentario # para que no aparezcan son muchos

head(datos) # solo los primeros
##   no semetre promedio      carrera
## 1  1       7    82.27 ARQUITECTURA
## 2  2      11    82.59 ARQUITECTURA
## 3  3       8    87.47 ARQUITECTURA
## 4  4      12    83.18 ARQUITECTURA
## 5  5      11    83.72 ARQUITECTURA
## 6  6       4    90.11 ARQUITECTURA
n.todos <- length(datos$promedio) # todos los datos
n.prom.mayor.cero <- length(datos$promedio[which(datos$promedio>0)])

n.todos # Cantidad total
## [1] 5946
n.prom.mayor.cero # Los que tienen promedio mayor que cero es la n que interesa
## [1] 5363

El histograma de los datos. Con frecuencia y cortes Sturges

El histograma sin cortes sales igual que el histograma de Sturges
El histograma de Scott sale igual que Freedman
hist(datos$promedio[which(datos$promedio > 0)], freq = TRUE, main = "Promedios mayores que cero de los alumnos inscritos del ITD", xlab = "Promedios", ylab = "Frecuencia")

hist(datos$promedio[which(datos$promedio > 0)], breaks = "Sturges", freq = TRUE, main = "Promedios mayores que cero de los alumnos inscritos del ITD", xlab = "Promedios", ylab = "Frecuencia")

hist(datos$promedio[which(datos$promedio > 0)], breaks = "Scott", freq = TRUE, main = "Promedios mayores que cero de los alumnos inscritos del ITD", xlab = "Promedios", ylab = "Frecuencia")

hist(datos$promedio[which(datos$promedio > 0)], breaks = "FD", freq = TRUE, main = "Promedios mayores que cero de los alumnos inscritos del ITD", xlab = "Promedios", ylab = "Frecuencia")

Determinando las frecuencias por clases (cortes). Los intervalos de clases. Sturges

# 1 + 3.33 * log10 ( n ) Fórmula de Sturges para número de clase

clases <- nclass.Sturges(datos$promedio[which(datos$promedio > 0)]) 
clases
## [1] 14
tabla.intervalos <- transform(table(cut(datos$promedio[which(datos$promedio > 0 )], breaks = clases)))

tabla.intervalos # es un data.frame
##           Var1 Freq
## 1    (70,72.1]    5
## 2  (72.1,74.2]    5
## 3  (74.2,76.3]   36
## 4  (76.3,78.3]  195
## 5  (78.3,80.4]  572
## 6  (80.4,82.5]  927
## 7  (82.5,84.6]  974
## 8  (84.6,86.7]  855
## 9  (86.7,88.8]  685
## 10 (88.8,90.8]  522
## 11 (90.8,92.9]  339
## 12   (92.9,95]  181
## 13   (95,97.1]   51
## 14 (97.1,99.2]   16
# Podemos agregar columnas al Data.Frame. Frecuencia Relativa y Frecuencia Porcentual
tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Rel' = tabla.intervalos$Freq / n.prom.mayor.cero) 

tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Porc' = tabla.intervalos$Freq.Rel * 100) 


tabla.intervalos
##           Var1 Freq    Freq.Rel  Freq.Porc
## 1    (70,72.1]    5 0.000932314  0.0932314
## 2  (72.1,74.2]    5 0.000932314  0.0932314
## 3  (74.2,76.3]   36 0.006712661  0.6712661
## 4  (76.3,78.3]  195 0.036360246  3.6360246
## 5  (78.3,80.4]  572 0.106656722 10.6656722
## 6  (80.4,82.5]  927 0.172851016 17.2851016
## 7  (82.5,84.6]  974 0.181614768 18.1614768
## 8  (84.6,86.7]  855 0.159425695 15.9425695
## 9  (86.7,88.8]  685 0.127727018 12.7727018
## 10 (88.8,90.8]  522 0.097333582  9.7333582
## 11 (90.8,92.9]  339 0.063210889  6.3210889
## 12   (92.9,95]  181 0.033749767  3.3749767
## 13   (95,97.1]   51 0.009509603  0.9509603
## 14 (97.1,99.2]   16 0.002983405  0.2983405

Haciendo un pastel de esta tabla de intervalos

Primero las etiquetas que son las clases y sus rangos
Luego las frecuencias en el pastel
# install.packages("plotrix") 
library(plotrix) 

# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(tabla.intervalos$Freq, " Q de ", tabla.intervalos$Var1)  

# recordar que n.prom.mayor.cero 
# es la cantidad de alumnos con promedio mayor que cero


pie3D(tabla.intervalos$Freq, labels = etiquetas, main = paste("Frecuencia de clases. ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5) 

# En porcentaje %
# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(round(tabla.intervalos$Freq.Porc,2), " % de ", tabla.intervalos$Var1)  

pie3D(round(tabla.intervalos$Freq.Porc,2), labels = etiquetas, main = paste("Frecuencia de clases. % ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5) 

Determinando las frecuencias por clases (cortes). Los intervalos de clases. Scott

# 3.5 * s * n elevada a la (1/3)
# s es desviación std

clases <- nclass.scott(datos$promedio[which(datos$promedio > 0)]) 
clases
## [1] 34
tabla.intervalos <- transform(table(cut(datos$promedio[which(datos$promedio > 0 )], breaks = clases)))

tabla.intervalos # es un data.frame
##           Var1 Freq
## 1    (70,70.9]    3
## 2  (70.9,71.7]    2
## 3  (71.7,72.6]    1
## 4  (72.6,73.4]    3
## 5  (73.4,74.3]    2
## 6  (74.3,75.1]   10
## 7    (75.1,76]   19
## 8    (76,76.9]   29
## 9  (76.9,77.7]   80
## 10 (77.7,78.6]  143
## 11 (78.6,79.4]  217
## 12 (79.4,80.3]  255
## 13 (80.3,81.2]  341
## 14   (81.2,82]  391
## 15   (82,82.9]  411
## 16 (82.9,83.7]  397
## 17 (83.7,84.6]  410
## 18 (84.6,85.4]  383
## 19 (85.4,86.3]  341
## 20 (86.3,87.2]  318
## 21   (87.2,88]  267
## 22   (88,88.9]  267
## 23 (88.9,89.7]  225
## 24 (89.7,90.6]  201
## 25 (90.6,91.4]  162
## 26 (91.4,92.3]  157
## 27 (92.3,93.2]  109
## 28   (93.2,94]   83
## 29   (94,94.9]   60
## 30 (94.9,95.7]   40
## 31 (95.7,96.6]   17
## 32 (96.6,97.5]    9
## 33 (97.5,98.3]    5
## 34 (98.3,99.2]    5
# Podemos agregar columnas al Data.Frame. Frecuencia Relativa y Frecuencia Porcentual
tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Rel' = tabla.intervalos$Freq / n.prom.mayor.cero) 

tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Porc' = tabla.intervalos$Freq.Rel * 100) 


tabla.intervalos
##           Var1 Freq     Freq.Rel  Freq.Porc
## 1    (70,70.9]    3 0.0005593884 0.05593884
## 2  (70.9,71.7]    2 0.0003729256 0.03729256
## 3  (71.7,72.6]    1 0.0001864628 0.01864628
## 4  (72.6,73.4]    3 0.0005593884 0.05593884
## 5  (73.4,74.3]    2 0.0003729256 0.03729256
## 6  (74.3,75.1]   10 0.0018646280 0.18646280
## 7    (75.1,76]   19 0.0035427932 0.35427932
## 8    (76,76.9]   29 0.0054074212 0.54074212
## 9  (76.9,77.7]   80 0.0149170241 1.49170241
## 10 (77.7,78.6]  143 0.0266641805 2.66641805
## 11 (78.6,79.4]  217 0.0404624277 4.04624277
## 12 (79.4,80.3]  255 0.0475480142 4.75480142
## 13 (80.3,81.2]  341 0.0635838150 6.35838150
## 14   (81.2,82]  391 0.0729069551 7.29069551
## 15   (82,82.9]  411 0.0766362111 7.66362111
## 16 (82.9,83.7]  397 0.0740257319 7.40257319
## 17 (83.7,84.6]  410 0.0764497483 7.64497483
## 18 (84.6,85.4]  383 0.0714152527 7.14152527
## 19 (85.4,86.3]  341 0.0635838150 6.35838150
## 20 (86.3,87.2]  318 0.0592951706 5.92951706
## 21   (87.2,88]  267 0.0497855678 4.97855678
## 22   (88,88.9]  267 0.0497855678 4.97855678
## 23 (88.9,89.7]  225 0.0419541302 4.19541302
## 24 (89.7,90.6]  201 0.0374790229 3.74790229
## 25 (90.6,91.4]  162 0.0302069737 3.02069737
## 26 (91.4,92.3]  157 0.0292746597 2.92746597
## 27 (92.3,93.2]  109 0.0203244453 2.03244453
## 28   (93.2,94]   83 0.0154764125 1.54764125
## 29   (94,94.9]   60 0.0111877680 1.11877680
## 30 (94.9,95.7]   40 0.0074585120 0.74585120
## 31 (95.7,96.6]   17 0.0031698676 0.31698676
## 32 (96.6,97.5]    9 0.0016781652 0.16781652
## 33 (97.5,98.3]    5 0.0009323140 0.09323140
## 34 (98.3,99.2]    5 0.0009323140 0.09323140

Haciendo un pastel de esta tabla de intervalos

Primero las etiquetas que son las clases y sus rangos
Luego las frecuencias en el pastel
# install.packages("plotrix") 
library(plotrix) 

# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(tabla.intervalos$Freq, " Q de ", tabla.intervalos$Var1)  

# recordar que n.prom.mayor.cero 
# es la cantidad de alumnos con promedio mayor que cero

pie3D(tabla.intervalos$Freq, labels = etiquetas, main = paste("Frecuencia de clases. ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5) 

# En porcentaje %
# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(round(tabla.intervalos$Freq.Porc,2), " % de ", tabla.intervalos$Var1)  

pie3D(round(tabla.intervalos$Freq.Porc,2), labels = etiquetas, main = paste("Frecuencia de clases. % ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5) 

Determinando las frecuencias por clases (cortes). Los intervalos de clases. FD (Freedman-Diaconis)

# 2 * IQ  * n elevada (1/3)
# IQ Rango intercuartílico


clases <- nclass.FD(datos$promedio[which(datos$promedio > 0)]) 
clases
## [1] 41
tabla.intervalos <- transform(table(cut(datos$promedio[which(datos$promedio > 0 )], breaks = clases)))

tabla.intervalos # es un data.frame
##           Var1 Freq
## 1    (70,70.7]    3
## 2  (70.7,71.4]    0
## 3  (71.4,72.1]    2
## 4  (72.1,72.8]    1
## 5  (72.8,73.6]    4
## 6  (73.6,74.3]    1
## 7    (74.3,75]    8
## 8    (75,75.7]   12
## 9  (75.7,76.4]   21
## 10 (76.4,77.1]   31
## 11 (77.1,77.8]   79
## 12 (77.8,78.5]  124
## 13 (78.5,79.2]  173
## 14   (79.2,80]  203
## 15   (80,80.7]  257
## 16 (80.7,81.4]  283
## 17 (81.4,82.1]  323
## 18 (82.1,82.8]  352
## 19 (82.8,83.5]  324
## 20 (83.5,84.2]  337
## 21 (84.2,84.9]  351
## 22 (84.9,85.7]  284
## 23 (85.7,86.4]  299
## 24 (86.4,87.1]  258
## 25 (87.1,87.8]  213
## 26 (87.8,88.5]  237
## 27 (88.5,89.2]  201
## 28 (89.2,89.9]  186
## 29 (89.9,90.6]  159
## 30 (90.6,91.3]  140
## 31 (91.3,92.1]  129
## 32 (92.1,92.8]   99
## 33 (92.8,93.5]   89
## 34 (93.5,94.2]   60
## 35 (94.2,94.9]   47
## 36 (94.9,95.6]   32
## 37 (95.6,96.3]   19
## 38   (96.3,97]    6
## 39   (97,97.7]    9
## 40 (97.7,98.5]    3
## 41 (98.5,99.2]    4
# Podemos agregar columnas al Data.Frame. Frecuencia Relativa y Frecuencia Porcentual
tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Rel' = tabla.intervalos$Freq / n.prom.mayor.cero) 

tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Porc' = tabla.intervalos$Freq.Rel * 100) 


tabla.intervalos
##           Var1 Freq     Freq.Rel  Freq.Porc
## 1    (70,70.7]    3 0.0005593884 0.05593884
## 2  (70.7,71.4]    0 0.0000000000 0.00000000
## 3  (71.4,72.1]    2 0.0003729256 0.03729256
## 4  (72.1,72.8]    1 0.0001864628 0.01864628
## 5  (72.8,73.6]    4 0.0007458512 0.07458512
## 6  (73.6,74.3]    1 0.0001864628 0.01864628
## 7    (74.3,75]    8 0.0014917024 0.14917024
## 8    (75,75.7]   12 0.0022375536 0.22375536
## 9  (75.7,76.4]   21 0.0039157188 0.39157188
## 10 (76.4,77.1]   31 0.0057803468 0.57803468
## 11 (77.1,77.8]   79 0.0147305613 1.47305613
## 12 (77.8,78.5]  124 0.0231213873 2.31213873
## 13 (78.5,79.2]  173 0.0322580645 3.22580645
## 14   (79.2,80]  203 0.0378519485 3.78519485
## 15   (80,80.7]  257 0.0479209398 4.79209398
## 16 (80.7,81.4]  283 0.0527689726 5.27689726
## 17 (81.4,82.1]  323 0.0602274846 6.02274846
## 18 (82.1,82.8]  352 0.0656349058 6.56349058
## 19 (82.8,83.5]  324 0.0604139474 6.04139474
## 20 (83.5,84.2]  337 0.0628379638 6.28379638
## 21 (84.2,84.9]  351 0.0654484430 6.54484430
## 22 (84.9,85.7]  284 0.0529554354 5.29554354
## 23 (85.7,86.4]  299 0.0557523774 5.57523774
## 24 (86.4,87.1]  258 0.0481074026 4.81074026
## 25 (87.1,87.8]  213 0.0397165765 3.97165765
## 26 (87.8,88.5]  237 0.0441916838 4.41916838
## 27 (88.5,89.2]  201 0.0374790229 3.74790229
## 28 (89.2,89.9]  186 0.0346820809 3.46820809
## 29 (89.9,90.6]  159 0.0296475853 2.96475853
## 30 (90.6,91.3]  140 0.0261047921 2.61047921
## 31 (91.3,92.1]  129 0.0240537013 2.40537013
## 32 (92.1,92.8]   99 0.0184598173 1.84598173
## 33 (92.8,93.5]   89 0.0165951893 1.65951893
## 34 (93.5,94.2]   60 0.0111877680 1.11877680
## 35 (94.2,94.9]   47 0.0087637516 0.87637516
## 36 (94.9,95.6]   32 0.0059668096 0.59668096
## 37 (95.6,96.3]   19 0.0035427932 0.35427932
## 38   (96.3,97]    6 0.0011187768 0.11187768
## 39   (97,97.7]    9 0.0016781652 0.16781652
## 40 (97.7,98.5]    3 0.0005593884 0.05593884
## 41 (98.5,99.2]    4 0.0007458512 0.07458512

Haciendo un pastel de esta tabla de intervalos

Primero las etiquetas que son las clases y sus rangos
Luego las frecuencias en el pastel

Aquí ya no hacemos el pastel porque salen 41 clases

# install.packages("plotrix") 
library(plotrix) 

# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(tabla.intervalos$Freq, " Q de ", tabla.intervalos$Var1)  

# recordar que n.prom.mayor.cero 
# es la cantidad de alumnos con promedio mayor que cero

# pie3D(tabla.intervalos$Freq, labels = etiquetas, main = paste("Frecuencia de clases. ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.3) 


# En porcentaje %
# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(round(tabla.intervalos$Freq.Porc,2), " % de ", tabla.intervalos$Var1)  

#pie3D(round(tabla.intervalos$Freq.Porc,2), labels = etiquetas, main = paste("Frecuencia de clases. % ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5) 

Determinando las frecuencias por clases (cortes).

Haciendio arbitrariamente 1o cortes
clases <- 10


tabla.intervalos <- transform(table(cut(datos$promedio[which(datos$promedio > 0 )], breaks = clases)))

tabla.intervalos # es un data.frame
##           Var1 Freq
## 1    (70,72.9]    6
## 2  (72.9,75.8]   30
## 3  (75.8,78.8]  298
## 4  (78.8,81.7] 1003
## 5  (81.7,84.6] 1377
## 6  (84.6,87.5] 1168
## 7  (87.5,90.4]  792
## 8  (90.4,93.3]  496
## 9  (93.3,96.3]  170
## 10 (96.3,99.2]   23
# Podemos agregar columnas al Data.Frame. Frecuencia Relativa y Frecuencia Porcentual
tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Rel' = tabla.intervalos$Freq / n.prom.mayor.cero) 

tabla.intervalos <- data.frame(tabla.intervalos, 'Freq.Porc' = tabla.intervalos$Freq.Rel * 100) 


tabla.intervalos
##           Var1 Freq    Freq.Rel  Freq.Porc
## 1    (70,72.9]    6 0.001118777  0.1118777
## 2  (72.9,75.8]   30 0.005593884  0.5593884
## 3  (75.8,78.8]  298 0.055565915  5.5565915
## 4  (78.8,81.7] 1003 0.187022189 18.7022189
## 5  (81.7,84.6] 1377 0.256759277 25.6759277
## 6  (84.6,87.5] 1168 0.217788551 21.7788551
## 7  (87.5,90.4]  792 0.147678538 14.7678538
## 8  (90.4,93.3]  496 0.092485549  9.2485549
## 9  (93.3,96.3]  170 0.031698676  3.1698676
## 10 (96.3,99.2]   23 0.004288644  0.4288644

Haciendo un pastel de esta tabla de intervalos

Primero las etiquetas que son las clases y sus rangos
Luego las frecuencias en el pastel
10 CORTES
# install.packages("plotrix") 
library(plotrix) 

# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(tabla.intervalos$Freq, " Q de ", tabla.intervalos$Var1)  

# recordar que n.prom.mayor.cero 
# es la cantidad de alumnos con promedio mayor que cero

pie3D(tabla.intervalos$Freq, labels = etiquetas, main = paste("Frecuencia de clases. ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5) 

# En porcentaje %
# paste significa concatenar, unir cadenas de caracteres
etiquetas <- paste(round(tabla.intervalos$Freq.Porc,2), " % de ", tabla.intervalos$Var1)  

pie3D(round(tabla.intervalos$Freq.Porc,2), labels = etiquetas, main = paste("Frecuencia de clases. % ", n.prom.mayor.cero, " Observaciones"), labelcex = 0.5)