library(readr)
library(dplyr) # install.packages("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/probabilidad-y-estad-stica/master/datos/promedios%20alumnos/datos%20alumnos%20promedios%20SEP%202020.csv", encoding = "UTF-8")
head(datos)
## No..Control Alumno Semestre Cr..Apr. Carga Promedio Carrera
## 1 20190001 1 11 198 19 80.21 SISTEMAS
## 2 20190002 2 11 235 10 84.33 SISTEMAS
## 3 20190003 3 9 235 10 95.25 SISTEMAS
## 4 20190004 4 9 226 19 95.00 SISTEMAS
## 5 20190005 5 10 231 14 82.32 SISTEMAS
## 6 20190006 6 9 212 23 95.02 SISTEMAS
tail(datos)
## No..Control Alumno Semestre Cr..Apr. Carga Promedio Carrera
## 5924 20195924 5924 2 27 28 92.83 ADMINISTRACION
## 5925 20195925 5925 7 94 13 80.95 ADMINISTRACION
## 5926 20195926 5926 5 103 32 92.68 ADMINISTRACION
## 5927 20195927 5927 4 79 34 86.18 ADMINISTRACION
## 5928 20195928 5928 5 108 32 90.48 ADMINISTRACION
## 5929 20195929 5929 7 169 32 92.33 ADMINISTRACION
ggplot(filter(datos, Promedio > 0), aes(x = Carrera, y = Promedio, color= Carrera ) ) +
geom_boxplot() +
labs(title = "Promedios")
administracion <- filter (datos, Promedio > 0 & Carrera == "ADMINISTRACION")
mean(administracion$Promedio)
## [1] 89.44312
median(administracion$Promedio)
## [1] 89.605
ggplot(administracion, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Administración",subtitle = paste("Media = ", round(mean(administracion$Promedio),2), ", Mediana = ", round(median(administracion$Promedio),2)))
arquitectura <- filter (datos, Promedio > 0 & Carrera == "ARQUITECTURA")
mean(arquitectura$Promedio)
## [1] 86.46481
median(arquitectura$Promedio)
## [1] 86.58
ggplot(arquitectura, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Arquitectura",subtitle = paste("Media = ", round(mean(arquitectura$Promedio),2), ", Mediana = ", round(median(arquitectura$Promedio),2)))
bioquimica <- filter (datos, Promedio > 0 & Carrera == "BIOQUIMICA")
mean(bioquimica$Promedio)
## [1] 84.68143
median(bioquimica$Promedio)
## [1] 84.06
ggplot(bioquimica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Bioquimca",subtitle = paste("Media = ", round(mean(bioquimica$Promedio),2), ", Mediana = ", round(median(bioquimica$Promedio),2)))
civil <- filter (datos, Promedio > 0 & Carrera == "CIVIL")
mean(civil$Promedio)
## [1] 84.281
median(civil$Promedio)
## [1] 83.915
ggplot(civil, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Civil",subtitle = paste("Media = ", round(mean(civil$Promedio),2), ", Mediana = ", round(median(civil$Promedio),2)))
electrica <- filter (datos, Promedio > 0 & Carrera == "ELECTRICA")
mean(electrica$Promedio)
## [1] 83.77305
median(electrica$Promedio)
## [1] 83.24
ggplot(electrica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Elctrica",subtitle = paste("Media = ", round(mean(electrica$Promedio),2), ", Mediana = ", round(median(electrica$Promedio),2)))
electronica <- filter (datos, Promedio > 0 & Carrera == "ELECTRONICA")
mean(electronica$Promedio)
## [1] 86.6572
median(electronica$Promedio)
## [1] 86.67
ggplot(electronica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Electronica",subtitle = paste("Media = ", round(mean(electronica$Promedio),2), ", Mediana = ", round(median(electronica$Promedio),2)))
gestionempresarial <- filter (datos, Promedio > 0 & Carrera == "GESTION EMPRESARIAL")
mean(gestionempresarial$Promedio)
## [1] 87.66966
median(gestionempresarial$Promedio)
## [1] 87.59
ggplot(gestionempresarial, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Gestion Empresarial",subtitle = paste("Media = ", round(mean(gestionempresarial$Promedio),2), ", Mediana = ", round(median(gestionempresarial$Promedio),2)))
industrial <- filter (datos, Promedio > 0 & Carrera == "INDUSTRIAL")
mean(industrial$Promedio)
## [1] 85.01737
median(industrial$Promedio)
## [1] 84.64
ggplot(industrial, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Industrial",subtitle = paste("Media = ", round(mean(industrial$Promedio),2), ", Mediana = ", round(median(industrial$Promedio),2)))
informatica <- filter (datos, Promedio > 0 & Carrera == "INFORMATICA")
mean(informatica$Promedio)
## [1] 86.26577
median(informatica$Promedio)
## [1] 86.64
ggplot(informatica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Informatica",subtitle = paste("Media = ", round(mean(informatica$Promedio),2), ", Mediana = ", round(median(informatica$Promedio),2)))
mecanica <- filter (datos, Promedio > 0 & Carrera == "MECANICA")
mean(mecanica$Promedio)
## [1] 82.58467
median(mecanica$Promedio)
## [1] 82.02
ggplot(mecanica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Mecanica",subtitle = paste("Media = ", round(mean(mecanica$Promedio),2), ", Mediana = ", round(median(mecanica$Promedio),2)))
mecatronica <- filter (datos, Promedio > 0 & Carrera == "MECATRONICA")
mean(mecatronica$Promedio)
## [1] 84.45948
median(mecatronica$Promedio)
## [1] 84.085
ggplot(mecatronica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Mecatronica",subtitle = paste("Media = ", round(mean(mecatronica$Promedio),2), ", Mediana = ", round(median(mecatronica$Promedio),2)))
quimica <- filter (datos, Promedio > 0 & Carrera == "QUIMICA")
mean(quimica$Promedio)
## [1] 86.05215
median(quimica$Promedio)
## [1] 85.67
ggplot(quimica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Quimica",subtitle = paste("Media = ", round(mean(quimica$Promedio),2), ", Mediana = ", round(median(quimica$Promedio),2)))
sistemas <- filter (datos, Promedio > 0 & Carrera == "SISTEMAS")
mean(sistemas$Promedio)
## [1] 85.90464
median(sistemas$Promedio)
## [1] 85.34
ggplot(sistemas, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Sistemas",subtitle = paste("Media = ", round(mean(sistemas$Promedio),2), ", Mediana = ", round(median(sistemas$Promedio),2)))
tic <- filter (datos, Promedio > 0 & Carrera == "TIC")
mean(tic$Promedio)
## [1] 84.31719
median(tic$Promedio)
## [1] 83.24
ggplot(tic, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Tic",subtitle = paste("Media = ", round(mean(tic$Promedio),2), ", Mediana = ", round(median(tic$Promedio),2)))
n <- nrow(administracion)
ggplot(administracion, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Administracion", subtitle = paste("Varianza = ", round(var(administracion$Promedio),2), ", DesvStd = ", round(sd(administracion$Promedio),2), ", C.V. = ", round(sd(administracion$Promedio) / mean(administracion$Promedio) * 100, 2 )))
n <- nrow(arquitectura)
ggplot(arquitectura, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Arquitectura", subtitle = paste("Varianza = ", round(var(arquitectura$Promedio),2), ", DesvStd = ", round(sd(arquitectura$Promedio),2), ", C.V. = ", round(sd(arquitectura$Promedio) / mean(arquitectura$Promedio) * 100, 2 )))
n <- nrow(bioquimica)
ggplot(bioquimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Bioquimica", subtitle = paste("Varianza = ", round(var(bioquimica$Promedio),2), ", DesvStd = ", round(sd(bioquimica$Promedio),2), ", C.V. = ", round(sd(bioquimica$Promedio) / mean(bioquimica$Promedio) * 100, 2 )))
n <- nrow(civil)
ggplot(civil, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Civil", subtitle = paste("Varianza = ", round(var(civil$Promedio),2), ", DesvStd = ", round(sd(civil$Promedio),2), ", C.V. = ", round(sd(civil$Promedio) / mean(civil$Promedio) * 100, 2 )))
n <- nrow(electrica)
ggplot(electrica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Electrica", subtitle = paste("Varianza = ", round(var(electrica$Promedio),2), ", DesvStd = ", round(sd(electrica$Promedio),2), ", C.V. = ", round(sd(electrica$Promedio) / mean(electrica$Promedio) * 100, 2 )))
n <- nrow(electronica)
ggplot(electronica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Electronica", subtitle = paste("Varianza = ", round(var(electronica$Promedio),2), ", DesvStd = ", round(sd(electronica$Promedio),2), ", C.V. = ", round(sd(electronica$Promedio) / mean(electronica$Promedio) * 100, 2 )))
n <- nrow(gestionempresarial)
ggplot(gestionempresarial, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Gestion Empresarial", subtitle = paste("Varianza = ", round(var(gestionempresarial$Promedio),2), ", DesvStd = ", round(sd(gestionempresarial$Promedio),2), ", C.V. = ", round(sd(gestionempresarial$Promedio) / mean(gestionempresarial$Promedio) * 100, 2 )))
n <- nrow(industrial)
ggplot(industrial, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Industrial", subtitle = paste("Varianza = ", round(var(industrial$Promedio),2), ", DesvStd = ", round(sd(industrial$Promedio),2), ", C.V. = ", round(sd(industrial$Promedio) / mean(industrial$Promedio) * 100, 2 )))
n <- nrow(informatica)
ggplot(informatica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Informatica", subtitle = paste("Varianza = ", round(var(informatica$Promedio),2), ", DesvStd = ", round(sd(informatica$Promedio),2), ", C.V. = ", round(sd(informatica$Promedio) / mean(informatica$Promedio) * 100, 2 )))
n <- nrow(mecanica)
ggplot(mecanica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Mecánica", subtitle = paste("Varianza = ", round(var(mecanica$Promedio),2), ", DesvStd = ", round(sd(mecanica$Promedio),2), ", C.V. = ", round(sd(mecanica$Promedio) / mean(mecanica$Promedio) * 100, 2 )))
n <- nrow(mecatronica)
ggplot(mecatronica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Mecatronica", subtitle = paste("Varianza = ", round(var(mecatronica$Promedio),2), ", DesvStd = ", round(sd(mecatronica$Promedio),2), ", C.V. = ", round(sd(mecatronica$Promedio) / mean(mecatronica$Promedio) * 100, 2 )))
n <- nrow(quimica)
ggplot(quimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Quimica", subtitle = paste("Varianza = ", round(var(quimica$Promedio),2), ", DesvStd = ", round(sd(quimica$Promedio),2), ", C.V. = ", round(sd(quimica$Promedio) / mean(quimica$Promedio) * 100, 2 )))
n <- nrow(sistemas)
ggplot(sistemas, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Sistemas", subtitle = paste("Varianza = ", round(var(sistemas$Promedio),2), ", DesvStd = ", round(sd(sistemas$Promedio),2), ", C.V. = ", round(sd(sistemas$Promedio) / mean(sistemas$Promedio) * 100, 2 )))
n <- nrow(tic)
ggplot(tic, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de TIC", subtitle = paste("Varianza = ", round(var(tic$Promedio),2), ", DesvStd = ", round(sd(tic$Promedio),2), ", C.V. = ", round(sd(tic$Promedio) / mean(tic$Promedio) * 100, 2 )))
tabla <- datos %>%
group_by (Carrera) %>%
summarize(n = n(), media = mean(Promedio), mediana = median(Promedio), vari = var(Promedio), desvstd = sd(Promedio), cv = desvstd / media * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
tabla
## # A tibble: 14 x 7
## Carrera n media mediana vari desvstd cv
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ADMINISTRACION 497 74.5 88.4 1125. 33.5 45.0
## 2 ARQUITECTURA 675 70.1 85.4 1163. 34.1 48.7
## 3 BIOQUIMICA 441 68.6 82.8 1126. 33.6 48.9
## 4 CIVIL 648 73.1 83.1 834. 28.9 39.5
## 5 ELECTRICA 280 60.7 81.8 1414. 37.6 61.9
## 6 ELECTRONICA 161 67.3 85.3 1324. 36.4 54.1
## 7 GESTION EMPRESARIAL 585 74.2 86.7 1013. 31.8 42.9
## 8 INDUSTRIAL 707 74.2 83.7 819. 28.6 38.6
## 9 INFORMATICA 101 60.6 83.6 1581. 39.8 65.6
## 10 MECANICA 301 61.7 80.7 1302. 36.1 58.4
## 11 MECATRONICA 432 70.8 83.4 981. 31.3 44.3
## 12 QUIMICA 568 72.6 84.6 996. 31.6 43.5
## 13 SISTEMAS 452 70.9 84.1 1081. 32.9 46.4
## 14 TIC 81 66.6 81.7 1209. 34.8 52.2
De acuerdo con la gráfica de los promedios generales, informática es la carrera con menor promedio y con el mayor promedio es administración. Según las gráficas de histograma de promedios, mecánica tiene una media de 82.58 y la mediana de 82.02 siendo la más baja, mientras administración tiene la media más alta de 89.44 y mediana de 89.6.