library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/probabilidad-y-estad-stica/master/datos/promedios%20alumnos/datos%20alumnos%20promedios%20SEP%202020.csv", encoding = "UTF-8")
head(datos)
## No..Control Alumno Semestre Cr..Apr. Carga Promedio Carrera
## 1 20190001 1 11 198 19 80.21 SISTEMAS
## 2 20190002 2 11 235 10 84.33 SISTEMAS
## 3 20190003 3 9 235 10 95.25 SISTEMAS
## 4 20190004 4 9 226 19 95.00 SISTEMAS
## 5 20190005 5 10 231 14 82.32 SISTEMAS
## 6 20190006 6 9 212 23 95.02 SISTEMAS
tail(datos)
## No..Control Alumno Semestre Cr..Apr. Carga Promedio Carrera
## 5924 20195924 5924 2 27 28 92.83 ADMINISTRACION
## 5925 20195925 5925 7 94 13 80.95 ADMINISTRACION
## 5926 20195926 5926 5 103 32 92.68 ADMINISTRACION
## 5927 20195927 5927 4 79 34 86.18 ADMINISTRACION
## 5928 20195928 5928 5 108 32 90.48 ADMINISTRACION
## 5929 20195929 5929 7 169 32 92.33 ADMINISTRACION
ggplot(filter(datos, Promedio > 0), aes(x = Carrera, y = Promedio, color= Carrera ) ) +
geom_boxplot() +
labs(title = "Promedios de Administración")
administracion <- filter (datos, Promedio > 0 & Carrera == "ADMINISTRACION")
mean(administracion$Promedio)
## [1] 89.44312
ggplot(administracion, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Administración",subtitle = paste("Media = ", round(mean(administracion$Promedio),2), ", Mediana = ", round(median(administracion$Promedio),2)))
arquitectura <- filter (datos, Promedio > 0 & Carrera == "ARQUITECTURA")
mean(arquitectura$Promedio)
## [1] 86.46481
ggplot(arquitectura, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Arquitectura",subtitle = paste("Media = ", round(mean(arquitectura$Promedio),2), ", Mediana = ", round(median(arquitectura$Promedio),2)))
bioquimica <- filter (datos, Promedio > 0 & Carrera == "BIOQUIMICA")
mean(bioquimica$Promedio)
## [1] 84.68143
ggplot(bioquimica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Bioquimica",subtitle = paste("Media = ", round(mean(bioquimica$Promedio),2), ", Mediana = ", round(median(bioquimica$Promedio),2)))
civil <- filter (datos, Promedio > 0 & Carrera == "CIVIL")
mean(civil$Promedio)
## [1] 84.281
ggplot(civil, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Civil",subtitle = paste("Media = ", round(mean(civil$Promedio),2), ", Mediana = ", round(median(civil$Promedio),2)))
electrica <- filter (datos, Promedio > 0 & Carrera == "ELECTRICA")
mean(electrica$Promedio)
## [1] 83.77305
ggplot(electrica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Electrica",subtitle = paste("Media = ", round(mean(electrica$Promedio),2), ", Mediana = ", round(median(electrica$Promedio),2)))
electronica <- filter (datos, Promedio > 0 & Carrera == "ELECTRONICA")
mean(electronica$Promedio)
## [1] 86.6572
ggplot(electronica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Electronica",subtitle = paste("Media = ", round(mean(electronica$Promedio),2), ", Mediana = ", round(median(electronica$Promedio),2)))
gestion <- filter (datos, Promedio > 0 & Carrera == "GESTION EMPRESARIAL")
mean(gestion$Promedio)
## [1] 87.66966
ggplot(gestion, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Gestion Empresarial",subtitle = paste("Media = ", round(mean(gestion$Promedio),2), ", Mediana = ", round(median(gestion$Promedio),2)))
industrial <- filter (datos, Promedio > 0 & Carrera == "INDUSTRIAL")
mean(industrial$Promedio)
## [1] 85.01737
ggplot(industrial, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Industrial",subtitle = paste("Media = ", round(mean(industrial$Promedio),2), ", Mediana = ", round(median(industrial$Promedio),2)))
informatica <- filter (datos, Promedio > 0 & Carrera == "INFORMATICA")
mean(informatica$Promedio)
## [1] 86.26577
ggplot(informatica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Informatica",subtitle = paste("Media = ", round(mean(informatica$Promedio),2), ", Mediana = ", round(median(informatica$Promedio),2)))
mecanica <- filter (datos, Promedio > 0 & Carrera == "MECANICA")
mean(mecanica$Promedio)
## [1] 82.58467
ggplot(mecanica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Mecanica",subtitle = paste("Media = ", round(mean(mecanica$Promedio),2), ", Mediana = ", round(median(mecanica$Promedio),2)))
mecatronica <- filter (datos, Promedio > 0 & Carrera == "MECATRONICA")
mean(mecatronica$Promedio)
## [1] 84.45948
ggplot(mecatronica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Mecatronica",subtitle = paste("Media = ", round(mean(mecatronica$Promedio),2), ", Mediana = ", round(median(mecatronica$Promedio),2)))
quimica <- filter (datos, Promedio > 0 & Carrera == "QUIMICA")
mean(quimica$Promedio)
## [1] 86.05215
ggplot(quimica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Quimica",subtitle = paste("Media = ", round(mean(quimica$Promedio),2), ", Mediana = ", round(median(quimica$Promedio),2)))
sistemas <- filter (datos, Promedio > 0 & Carrera == "SISTEMAS")
mean(sistemas$Promedio)
## [1] 85.90464
ggplot(sistemas, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Sistemas",subtitle = paste("Media = ", round(mean(sistemas$Promedio),2), ", Mediana = ", round(median(sistemas$Promedio),2)))
tic <- filter (datos, Promedio > 0 & Carrera == "TIC")
mean(tic$Promedio)
## [1] 84.31719
ggplot(tic, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de TIC",subtitle = paste("Media = ", round(mean(tic$Promedio),2), ", Mediana = ", round(median(tic$Promedio),2)))
n <- nrow(administracion)
ggplot(administracion, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Administración", subtitle = paste("Varianza = ", round(var(administracion$Promedio),2), ", DesvStd = ", round(sd(administracion$Promedio),2), ", C.V. = ", round(sd(administracion$Promedio) / mean(administracion$Promedio) * 100, 2 )))
n <- nrow(arquitectura)
ggplot(arquitectura, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Arquitectura", subtitle = paste("Varianza = ", round(var(arquitectura$Promedio),2), ", DesvStd = ", round(sd(arquitectura$Promedio),2), ", C.V. = ", round(sd(arquitectura$Promedio) / mean(arquitectura$Promedio) * 100, 2 )))
n <- nrow(bioquimica)
ggplot(bioquimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Bioquimica", subtitle = paste("Varianza = ", round(var(bioquimica$Promedio),2), ", DesvStd = ", round(sd(bioquimica$Promedio),2), ", C.V. = ", round(sd(bioquimica$Promedio) / mean(bioquimica$Promedio) * 100, 2 )))
n <- nrow(civil)
ggplot(civil, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Civil", subtitle = paste("Varianza = ", round(var(civil$Promedio),2), ", DesvStd = ", round(sd(civil$Promedio),2), ", C.V. = ", round(sd(civil$Promedio) / mean(civil$Promedio) * 100, 2 )))
n <- nrow(electrica)
ggplot(electrica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Electrica", subtitle = paste("Varianza = ", round(var(electrica),2), ", DesvStd = ", round(sd(electrica$Promedio),2), ", C.V. = ", round(sd(electrica$Promedio) / mean(electrica$Promedio) * 100, 2 )))
## Warning in var(electrica): NAs introducidos por coerción
n <- nrow(electronica)
ggplot(electronica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Electronica", subtitle = paste("Varianza = ", round(var(electronica$Promedio),2), ", DesvStd = ", round(sd(electronica$Promedio),2), ", C.V. = ", round(sd(electronica$Promedio) / mean(electronica$Promedio) * 100, 2 )))
n <- nrow(gestion)
ggplot(gestion, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Gestion Empresarial", subtitle = paste("Varianza = ", round(var(gestion$Promedio),2), ", DesvStd = ", round(sd(gestion$Promedio),2), ", C.V. = ", round(sd(gestion$Promedio) / mean(gestion$Promedio) * 100, 2 )))
n <- nrow(industrial)
ggplot(industrial, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Industrial", subtitle = paste("Varianza = ", round(var(industrial$Promedio),2), ", DesvStd = ", round(sd(industrial$Promedio),2), ", C.V. = ", round(sd(industrial$Promedio) / mean(industrial$Promedio) * 100, 2 )))
n <- nrow(informatica)
ggplot(informatica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Informatica", subtitle = paste("Varianza = ", round(var(informatica$Promedio),2), ", DesvStd = ", round(sd(informatica$Promedio),2), ", C.V. = ", round(sd(informatica$Promedio) / mean(informatica$Promedio) * 100, 2 )))
n <- nrow(mecanica)
ggplot(mecanica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Mecanica", subtitle = paste("Varianza = ", round(var(mecanica$Promedio),2), ", DesvStd = ", round(sd(mecanica$Promedio),2), ", C.V. = ", round(sd(mecanica$Promedio) / mean(mecanica$Promedio) * 100, 2 )))
n <- nrow(mecatronica)
ggplot(mecatronica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Mecatronica", subtitle = paste("Varianza = ", round(var(mecatronica$Promedio),2), ", DesvStd = ", round(sd(mecatronica$Promedio),2), ", C.V. = ", round(sd(mecatronica$Promedio) / mean(mecatronica$Promedio) * 100, 2 )))
n <- nrow(quimica)
ggplot(quimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Quimica", subtitle = paste("Varianza = ", round(var(quimica$Promedio),2), ", DesvStd = ", round(sd(quimica$Promedio),2), ", C.V. = ", round(sd(quimica$Promedio) / mean(quimica$Promedio) * 100, 2 )))
n <- nrow(sistemas)
ggplot(sistemas, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Sistemas", subtitle = paste("Varianza = ", round(var(sistemas$Promedio),2), ", DesvStd = ", round(sd(sistemas$Promedio),2), ", C.V. = ", round(sd(sistemas$Promedio) / mean(sistemas$Promedio) * 100, 2 )))
n <- nrow(tic)
ggplot(tic, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de TIC", subtitle = paste("Varianza = ", round(var(tic$Promedio),2), ", DesvStd = ", round(sd(tic$Promedio),2), ", C.V. = ", round(sd(tic$Promedio) / mean(tic$Promedio) * 100, 2 )))
tabla <- datos %>%
group_by (Carrera) %>%
summarise(n = n(), media = mean(Promedio), mediana = median(Promedio), vari = var(Promedio), desvstd = sd(Promedio), cv = desvstd / media * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
tabla
## # A tibble: 14 x 7
## Carrera n media mediana vari desvstd cv
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ADMINISTRACION 497 74.5 88.4 1125. 33.5 45.0
## 2 ARQUITECTURA 675 70.1 85.4 1163. 34.1 48.7
## 3 BIOQUIMICA 441 68.6 82.8 1126. 33.6 48.9
## 4 CIVIL 648 73.1 83.1 834. 28.9 39.5
## 5 ELECTRICA 280 60.7 81.8 1414. 37.6 61.9
## 6 ELECTRONICA 161 67.3 85.3 1324. 36.4 54.1
## 7 GESTION EMPRESARIAL 585 74.2 86.7 1013. 31.8 42.9
## 8 INDUSTRIAL 707 74.2 83.7 819. 28.6 38.6
## 9 INFORMATICA 101 60.6 83.6 1581. 39.8 65.6
## 10 MECANICA 301 61.7 80.7 1302. 36.1 58.4
## 11 MECATRONICA 432 70.8 83.4 981. 31.3 44.3
## 12 QUIMICA 568 72.6 84.6 996. 31.6 43.5
## 13 SISTEMAS 452 70.9 84.1 1081. 32.9 46.4
## 14 TIC 81 66.6 81.7 1209. 34.8 52.2
La gráfica de caja de promedios muestra el numero de casos que se alejan del valor del promedio Histograma de Promedio muestra la media y mediana del promedio por carrera Dispersión de Promedio muestra la varianza y desviacion estandar del promedio por carrera la carrera con menor dispersión es Sistemas. Lo que se analizo en este caso son las medidas de dispersion de los promedios que se tienen por carrera en una universidas. Las medias de dispersion son parámetros estadísticos que informan sobre la variabilidad de los datos, si la distribución de los datos es más o menos homogénea y se nos dan una medida sobre la representatividad de los parámetros de centralización como lo son la moda, mediana o media)